From: François Fleuret Date: Sat, 22 Jul 2023 20:59:10 +0000 (+0200) Subject: Update. X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?a=commitdiff_plain;h=8492656cf0cc5de4f7e2c4aa8ccb717193293b40;p=picoclvr.git Update. --- diff --git a/graph.py b/graph.py index a819283..c286388 100755 --- a/graph.py +++ b/graph.py @@ -161,7 +161,7 @@ if __name__ == "__main__": nb_heads=2, nb_blocks=5, dropout=0.1, - causal=True, + #causal=True, ) model.eval() @@ -171,6 +171,8 @@ if __name__ == "__main__": attention_matrices = [m[0, 0] for m in model.retrieve_attention()] + + # attention_matrices = [ torch.rand(3,5), torch.rand(8,3), torch.rand(5,8) ] # for a in attention_matrices: a=a/a.sum(-1,keepdim=True) diff --git a/mygpt.py b/mygpt.py index 0400b48..0cf70e0 100755 --- a/mygpt.py +++ b/mygpt.py @@ -46,7 +46,7 @@ class BracketedSequence: return self.x[:, self.first : self.first + self.nb] def complete(self): - return self.first == 0 and self.nb == x.size(1) + return self.first == 0 and self.nb == self.x.size(1) ######################################################################