Update. master
authorFrançois Fleuret <francois@fleuret.org>
Wed, 23 Oct 2024 11:45:36 +0000 (13:45 +0200)
committerFrançois Fleuret <francois@fleuret.org>
Wed, 23 Oct 2024 11:45:36 +0000 (13:45 +0200)
dlscore.tex [new file with mode: 0644]
elbo.tex
sliderule.pdf [new file with mode: 0644]
sliderule.tex [new file with mode: 0644]

diff --git a/dlscore.tex b/dlscore.tex
new file mode 100644 (file)
index 0000000..6fd06ac
--- /dev/null
@@ -0,0 +1,165 @@
+%% -*- mode: latex; mode: reftex; mode: flyspell; coding: utf-8; tex-command: "pdflatex.sh" -*-
+
+\documentclass[11pt,a4paper,twocolumn,twoside]{article}
+\usepackage[a4paper,top=2cm,bottom=2cm,left=2.5cm,right=2.5cm]{geometry}
+\usepackage[utf8]{inputenc}
+\usepackage{cmbright}
+
+\begin{document}
+
+\noindent One point per item if you know precisely the meaning of the
+listed word(s)
+
+\section{Machine Learning}
+
+\begin{enumerate}
+
+  \item VC dimension
+  \item over-fitting, under-fitting
+  \item logistic regression
+  \item Q-value
+  \item kernel trick
+  \item boosting
+  \item PCA
+  \item feature design
+  \item linear regression
+  \item expectation-maximization, GMM
+  \item SVM
+  \item Bellman equation
+  \item decision tree
+  \item train/validation/test sets
+  \item naive Bayesian model
+  \item autoregressive model
+  \item bias-variance dilemma
+  \item policy gradient
+  \item random forest
+  \item k-NN
+  \item perceptron algorithm
+
+\end{enumerate}
+
+
+\section{Deep-Learning}
+
+\begin{enumerate}
+
+  \item Adam
+  \item softmax
+  \item residual connections
+  \item autograd
+  \item ReLU
+  \item dropout
+  \item CLIP
+  \item Xavier's initialization
+  \item Vanishing gradient
+  \item LeNet
+  \item ViT
+  \item transposed convolution layer
+  \item checkpoint (during the forward pass)
+  \item minibatch
+  \item masked model
+  \item supervised / unsupervised
+  \item data augmentation
+  \item attention block
+  \item SGD
+  \item batchnorm
+  \item gradient clipping
+  \item tokenizer
+  \item VAE
+  \item weight decay
+  \item GELU
+  \item LSTM, GRU
+  \item GAN
+  \item resnet
+  \item straight-through estimator
+  \item convolution layer
+  \item pre-training / fine-tuning
+  \item perplexity
+  \item logits
+  \item cls token
+  \item forward pass
+  \item Transformer (original one), GPT
+  \item backward pass
+  \item autoencoder, denoising autoencoder
+  \item layer norm
+  \item GNN
+  \item learning rate schedule
+  \item diffusion model
+  \item cross-entropy
+  \item max pooling, average pooling
+  \item RNN
+  \item contrastive loss
+  \item positional encoding
+  \item causal model
+  \item attention layer
+  \item SSL
+  \item MSE
+  \item tensor
+
+\end{enumerate}
+
+\section{Math}
+
+\begin{enumerate}
+
+  \item Hessian
+  \item random variable
+  \item matrix
+  \item entropy, mutual information
+  \item dot product
+  \item mean, variance
+  \item L2 norm
+  \item chain rule (differentiation)
+  \item Fourier transform
+  \item continuity, Lipschitz continuity
+  \item chain rule (probability)
+  \item polynomial
+  \item Cantor's diagonal argument
+  \item Jacobian
+  \item linear operator
+  \item gradient
+  \item Bayes' thorem
+  \item vector
+  \item joint law, product law
+  \item Gaussian distribution
+  \item distribution
+  \item determinant, rank
+  \item eigen-decomposition, svd
+  \item maximum likelihood
+  \item Central Limit Theorem
+
+\end{enumerate}
+
+\section{Computer Science}
+
+\begin{enumerate}
+
+  \item polymorphism
+  \item recursion
+  \item value passed by reference
+  \item binary search
+  \item quick sort
+  \item parallel scan
+  \item mutability
+  \item Turing machine
+  \item FP32
+  \item iterator
+  \item interpreter, compiler
+  \item anonymous function
+  \item set
+  \item binary heap
+  \item mutex
+  \item cache memory
+  \item scope of a variable or function
+  \item dynamic programming
+  \item hash table
+  \item big-O notation
+  \item Turing complete
+  \item class inheritance
+  \item closure
+  \item loop unrolling
+  \item complexity
+
+\end{enumerate}
+
+\end{document}
index 4c6cb24..563ec3c 100644 (file)
--- a/elbo.tex
+++ b/elbo.tex
@@ -148,4 +148,20 @@ $\theta$ and $\alpha$ to maximize
 it maximizes $\log \, p_\theta(x_n)$ and brings $q_\alpha(z \mid
 x_n)$ close to $p_\theta(z \mid x_n)$.
 
 it maximizes $\log \, p_\theta(x_n)$ and brings $q_\alpha(z \mid
 x_n)$ close to $p_\theta(z \mid x_n)$.
 
+\medskip
+
+A point that may be important in practice is
+%
+\begin{align*}
+ & \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \frac{p_\theta(x_n,Z)}{q_\alpha(Z \mid x_n)} \right]                      \\
+ & = \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \frac{p_\theta(x_n \mid Z) p_\theta(Z)}{q_\alpha(Z \mid x_n)} \right] \\
+ & = \expect_{Z \sim q_\alpha(z \mid x_n)} \left[ \log \, p_\theta(x_n \mid Z) \right]                                            \\
+ & \hspace*{7em} - \dkl(q_\alpha(z \mid x_n) \, \| \, p_\theta(z)).
+\end{align*}
+%
+This form is useful because for certain $p_\theta$ and $q_\alpha$, for
+instance if they are Gaussian, the KL term can be computed exactly
+instead of through sampling, which removes one source of noise in the
+optimization process.
+
 \end{document}
 \end{document}
diff --git a/sliderule.pdf b/sliderule.pdf
new file mode 100644 (file)
index 0000000..7746196
Binary files /dev/null and b/sliderule.pdf differ
diff --git a/sliderule.tex b/sliderule.tex
new file mode 100644 (file)
index 0000000..9c5a784
--- /dev/null
@@ -0,0 +1,39 @@
+%% -*- mode: latex; mode: reftex; mode: flyspell; coding: utf-8; tex-command: "pdflatex.sh" -*-
+
+\documentclass[11pt,a4paper,twoside]{article}
+\usepackage[a4paper,top=1cm,bottom=1cm,left=1cm,right=1cm]{geometry}
+\usepackage[utf8]{inputenc}
+\usepackage{amsmath,amssymb,dsfont}
+\usepackage[pdftex]{graphicx}
+\usepackage[colorlinks=true,linkcolor=blue,urlcolor=blue,citecolor=blue]{hyperref}
+\usepackage[round]{natbib}
+\usepackage{tikz}
+\usetikzlibrary{arrows,arrows.meta,calc}
+\usetikzlibrary{patterns,backgrounds}
+\usetikzlibrary{positioning,fit}
+\usetikzlibrary{shapes.geometric,shapes.multipart}
+\usetikzlibrary{patterns.meta,decorations.pathreplacing,calligraphy}
+\usetikzlibrary{tikzmark}
+\usetikzlibrary{decorations.pathmorphing}
+
+\pagestyle{empty}
+
+\begin{document}
+
+\makebox[\textwidth][c]{
+\begin{tikzpicture}
+\draw[draw=none] (-7,-2)--(7,25);
+\draw (0,-1) -- (0,23);
+% for x in range(1,12): print(f"{x}/{math.log(x)}")
+\foreach \x/\y in {%
+{1/10}/0.000,{1/8}/0.710,{1/6}/1.625,{1/4}/2.914,{1/3}/3.829,{1/2}/5.118,1/7.323,2/9.527,3/10.817,4/11.732,5/12.441,6/13.021,7/13.511,9/14.310,11/14.949,13/15.480,15/15.935,18/16.515,21/17.005,25/17.560,29/18.032,34/18.537,40/19.054,47/19.567,55/20.067,64/20.549,74/21.011,86/21.489,100/21.968}{
+  \draw (0,\y)--+(-1,0) node[left] {$\x$};
+  \draw (0,\y)--+(1,0) node[right] {$\x$};
+  \node[draw,circle] at (-5, 10) {\Huge A};
+  \node[draw,circle] at (5, 10) {\Huge B};
+}
+
+\end{tikzpicture}
+}
+
+\end{document}