X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=tex.git;a=blobdiff_plain;f=elbo.tex;fp=elbo.tex;h=239a657f8c438ebf50e970b52d049c2b22a5b498;hp=6875ddf7bd096d28c78a7dcaa857354f2315e62a;hb=43b0cb04eae4537d95775038d9e700e642087d6d;hpb=119ad14a2072217edf3e2315154614815b72ccbd diff --git a/elbo.tex b/elbo.tex index 6875ddf..239a657 100644 --- a/elbo.tex +++ b/elbo.tex @@ -71,16 +71,23 @@ \begin{document} -\vspace*{0ex} +\setlength{\abovedisplayskip}{2ex} +\setlength{\belowdisplayskip}{2ex} +\setlength{\abovedisplayshortskip}{2ex} +\setlength{\belowdisplayshortskip}{2ex} + +\vspace*{-4ex} \begin{center} {\Large The Evidence Lower Bound} +\vspace*{1ex} + Fran\c cois Fleuret \today -\vspace*{1ex} +\vspace*{-1ex} \end{center} @@ -102,12 +109,14 @@ p_\theta(x_n) & = \int_z p_\theta(x_n,z) dz \\ & = \expect_{Z \sim q(z)} \left[\frac{p_\theta(x_n,Z)}{q(Z)}\right]. \end{align*} % -So if we wanted to maximize $p_\theta(x_n)$ alone, we could sample a +So if we sample a $Z$ with $q$ and maximize % \begin{equation*} -\frac{p_\theta(x_n,Z)}{q(Z)}.\label{eq:estimator} +\frac{p_\theta(x_n,Z)}{q(Z)}, \end{equation*} +% +we do maximize $p_\theta(x_n)$ on average. But we want to maximize $\sum_n \log \, p_\theta(x_n)$. If we use the $\log$ of the previous expression, we can decompose its average value