X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=tex.git;a=blobdiff_plain;f=elbo.tex;h=563ec3c3bf98e3e250d2fc5fa4deffb26dc74ac8;hp=4c6cb24cc1dd7475f2f8c1a24054f5d0b3e707e8;hb=HEAD;hpb=4b8c58903baa9ff8c508bda798492e10dde9cb7f diff --git a/elbo.tex b/elbo.tex index 4c6cb24..563ec3c 100644 --- a/elbo.tex +++ b/elbo.tex @@ -148,4 +148,20 @@ $\theta$ and $\alpha$ to maximize it maximizes $\log \, p_\theta(x_n)$ and brings $q_\alpha(z \mid x_n)$ close to $p_\theta(z \mid x_n)$. +\medskip + +A point that may be important in practice is +% +\begin{align*} + & \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \frac{p_\theta(x_n,Z)}{q_\alpha(Z \mid x_n)} \right] \\ + & = \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \frac{p_\theta(x_n \mid Z) p_\theta(Z)}{q_\alpha(Z \mid x_n)} \right] \\ + & = \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \, p_\theta(x_n \mid Z) \right] \\ + & \hspace*{7em} - \dkl(q_\alpha(z \mid x_n) \, \| \, p_\theta(z)). +\end{align*} +% +This form is useful because for certain $p_\theta$ and $q_\alpha$, for +instance if they are Gaussian, the KL term can be computed exactly +instead of through sampling, which removes one source of noise in the +optimization process. + \end{document}