X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=tex.git;a=blobdiff_plain;f=attention.tex;fp=attention.tex;h=276759acbf59f11fe258a527d68afb788dcb6dca;hp=b6f15dd57d9e652bb4480fa81d8ca5626ca085f3;hb=6e541e7102264b99f1a4aa72325a2b4b81fcb3eb;hpb=b94c6b870ce9282f924b043bedac4c62920e7c6a diff --git a/attention.tex b/attention.tex index b6f15dd..276759a 100644 --- a/attention.tex +++ b/attention.tex @@ -1,5 +1,7 @@ % -*- mode: latex; mode: reftex; mode: auto-fill; mode: flyspell; -*- +% Written by Francois Fleuret + \documentclass[c,8pt]{beamer} \usepackage{tikz} @@ -22,6 +24,7 @@ weight $V$ with it to get $Y$. \[ \uncover<2,4,6->{ +% A_{i,j} = \softmax \left( \frac{Q_i \cdot K_j}{\sqrt{d}} \right) A_i = \softmax \left( \frac{Q_i \, K\transpose}{\sqrt{d}} \right) } % @@ -37,30 +40,34 @@ weight $V$ with it to get $Y$. \makebox[\textwidth][c]{ \begin{tikzpicture} - \node[cm={0.5, 0.5, 0.0, 1.0, (0.0, 0.0)}] (V) at (-2, 2.35) { + \node[xscale=0.5,yslant=0.5] (V) at (-2, 2.35) { \begin{tikzpicture} \draw[fill=green!20] (0, 0) rectangle (4, 1.4); \uncover<3,5>{\draw[fill=yellow] (0, 0) rectangle (4, 1.4);} \foreach \x in { 0.2, 0.4, ..., 3.8 } \draw (\x, 0) -- ++(0, 1.4); + %% \foreach \y in { 0.0, 0.2, ..., 1.4 } \draw (0, \y) -- ++(4, 0); \end{tikzpicture} }; - \node[cm={1.0, 0.0, 0.5, 0.5, (0.0, 0.0)}] (A) at (0.5, 1.6) { + \node[yscale=0.5,xslant=0.5] (A) at (0.5, 1.6) { \begin{tikzpicture} \draw (0, 0) rectangle ++(3, 4); + %% \uncover<4->{\draw[fill=green!20] (0, 0) rectangle ++(0.2, 4);} + %% \uncover<6->{\draw[fill=green!20] (0.2, 0) rectangle ++(0.2, 4);} \end{tikzpicture} }; \uncover<2-3>{ - \node[cm={0.5, 0.5, 0.0, 1.0, (0.0, 0.0)}] (a1) at (-0.9, 2.1) { + \node[xscale=0.5,yslant=0.5] (a1) at (-0.9, 2.1) { \begin{tikzpicture} \draw[draw=none] (0, 0) rectangle (4, 1); \foreach \x/\y in { 0.00/0.03, 0.20/0.04, 0.40/0.07, 0.60/0.35, 0.80/0.52, 1.00/1.00, 1.20/0.82, 1.40/0.25, 1.60/0.08, 1.80/0.03, 2.00/0.15, 2.20/0.24, 2.40/0.70, 2.60/0.05, 2.80/0.03, - 3.00/0.03, 3.20/0.03, 3.40/0.00, 3.60/0.03, 3.80/0.00 }{ - \uncover<2>{\draw[black,fill=red] (\x, 0) rectangle ++(0.2, \y);} + 3.00/0.03, 3.20/0.03, 3.40/0.00, 3.60/0.03, 3.80/0.00 + }{ + \uncover<2>{\draw[black,fill=orange] (\x, 0) rectangle ++(0.2, \y);} \uncover<3>{\draw[black,fill=yellow] (\x, 0) rectangle ++(0.2, \y);} }; \end{tikzpicture} @@ -68,54 +75,60 @@ weight $V$ with it to get $Y$. } \uncover<4-5>{ - \node[cm={0.5, 0.5, 0.0, 1.0, (0.0, 0.0)}] (a2) at (-0.7, 2.1) { + \node[xscale=0.5,yslant=0.5] (a2) at (-0.7, 2.1) { \begin{tikzpicture} \draw[draw=none] (0, 0) rectangle (4, 1); \foreach \x/\y in { 0.00/0.03, 0.20/0.04, 0.40/0.07, 0.60/0.03, 0.80/0.03, 1.00/0.05, 1.20/0.02, 1.40/0.08, 1.60/0.35, 1.80/0.85, 2.00/0.05, 2.20/0.04, 2.40/0.03, 2.60/0.05, 2.80/0.03, - 3.00/0.03, 3.20/0.03, 3.40/0.00, 3.60/0.03, 3.80/0.00 }{ - \uncover<4>{\draw[black,fill=red] (\x, 0) rectangle ++(0.2, \y);} + 3.00/0.03, 3.20/0.03, 3.40/0.00, 3.60/0.03, 3.80/0.00 + }{ + \uncover<4>{\draw[black,fill=orange] (\x, 0) rectangle ++(0.2, \y);} \uncover<5>{\draw[black,fill=yellow] (\x, 0) rectangle ++(0.2, \y);} }; \end{tikzpicture} }; } - \node[cm={1.0, 0.0, 0.0, 1.0, (0.0, 0.0)}] (Q) at (-0.5, -0.05) { + \node (Q) at (-0.5, -0.05) { \begin{tikzpicture} \draw[fill=green!20] (0, 0) rectangle (3, 1.0); \foreach \x in { 0.2, 0.4, ..., 2.8 } \draw (\x, 0) -- ++(0, 1.0); \uncover<2>{\draw[fill=yellow] (0.0, 0) rectangle ++(0.2, 1);} \uncover<4>{\draw[fill=yellow] (0.2, 0) rectangle ++(0.2, 1);} + %% \foreach \y in { 0.0, 0.2, ..., 1.0 } \draw (0, \y) -- ++(3, 0); \end{tikzpicture} }; - \node[cm={1.0, 0.0, 0.0, 1.0, (0.0, 0.0)}] (Y) at (1.5, 3.45) { + \node (Y) at (1.5, 3.45) { \begin{tikzpicture} - \uncover<3>{\draw[fill=red] (0.0, 0) rectangle ++(0.2, 1.4);} + \uncover<3>{\draw[fill=orange] (0.0, 0) rectangle ++(0.2, 1.4);} \uncover<4->{\draw[fill=green!20] (0.0, 0) rectangle ++(0.2, 1.4);} \uncover<6->{\draw[fill=green!20] (0.0, 0) rectangle ++(3, 1.4);} - \uncover<5>{\draw[fill=red] (0.2, 0) rectangle ++(0.2, 1.4);} + \uncover<5>{\draw[fill=orange] (0.2, 0) rectangle ++(0.2, 1.4);} \draw (0, 0) rectangle (3, 1.4); \foreach \x in { 0.2, 0.4, ..., 2.8 } \draw (\x, 0) -- ++(0, 1.4); + %% \foreach \y in { 0.0, 0.2, ..., 1.4 } \draw (0, \y) -- ++(3, 0); \end{tikzpicture} }; - \node[cm={0.5, 0.5, 0.0, 1.0, (0.0, 0.0)}] (K) at (3, 1.1) { + \node[xscale=0.5,yslant=0.5] (K) at (3, 1.1) { \begin{tikzpicture} \draw[fill=green!20] (0, 0) rectangle (4, 1); \uncover<2,4>{\draw[fill=yellow] (0, 0) rectangle (4, 1);} \foreach \x in { 0.2, 0.4, ..., 3.8 } \draw (\x, 0) -- ++(0, 1); + %% \foreach \y in { 0.0, 0.2, ..., 1.0 } \draw (0, \y) -- ++(4, 0); \end{tikzpicture} }; - \node[left of=V,xshift=0.5cm,yshift=0.7cm] (Vl) {V}; - \node[left of=Q,xshift=-0.8cm] (Ql) {Q}; - \node (Al) at (A) {A}; - \node[right of=K,xshift=-0.6cm,yshift=-0.6cm] (Kl) {K}; - \node[right of=Y,xshift=0.8cm] (Yl) {Y}; + \node[left of=V,xshift=0.5cm,yshift=0.7cm] (Vl) {$V$}; + \node[left of=Q,xshift=-0.8cm] (Ql) {$Q$}; + \node (Al) at (A) {$A$}; + \node[right of=K,xshift=-0.6cm,yshift=-0.6cm] (Kl) {$K$}; + \node[right of=Y,xshift=0.8cm] (Yl) {$Y$}; + + % \uncover<1>{\draw[<->] (2, 0) -- ++ (0, 1) node[midway,right]{$d$};} \end{tikzpicture} }