projects
/
mygpt.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
e36adb5
)
Update.
author
Francois Fleuret
<francois@fleuret.org>
Tue, 26 Jul 2022 21:05:46 +0000
(23:05 +0200)
committer
Francois Fleuret
<francois@fleuret.org>
Tue, 26 Jul 2022 21:05:46 +0000
(23:05 +0200)
main.py
patch
|
blob
|
history
diff --git
a/main.py
b/main.py
index
cd0e1ea
..
82caebe
100755
(executable)
--- a/
main.py
+++ b/
main.py
@@
-170,6
+170,7
@@
class TaskPicoCLVR(Task):
descr = [ s.strip().split(' ') for s in descr ]
l = max([ len(s) for s in descr ])
descr = [ s.strip().split(' ') for s in descr ]
l = max([ len(s) for s in descr ])
+ #descr = [ [ '<unk>' ] * (l - len(s)) + s for s in descr ]
descr = [ s + [ '<unk>' ] * (l - len(s)) for s in descr ]
return descr
descr = [ s + [ '<unk>' ] * (l - len(s)) for s in descr ]
return descr
@@
-191,6
+192,7
@@
class TaskPicoCLVR(Task):
self.token2id = dict([ (t, n) for n, t in enumerate(tokens) ])
self.id2token = dict([ (n, t) for n, t in enumerate(tokens) ])
self.token2id = dict([ (t, n) for n, t in enumerate(tokens) ])
self.id2token = dict([ (n, t) for n, t in enumerate(tokens) ])
+ # Tokenize the train and test sets
t = [ [ self.token2id[u] for u in s ] for s in self.train_descr ]
self.train_input = torch.tensor(t, device = self.device)
t = [ [ self.token2id[u] for u in s ] for s in self.test_descr ]
t = [ [ self.token2id[u] for u in s ] for s in self.train_descr ]
self.train_input = torch.tensor(t, device = self.device)
t = [ [ self.token2id[u] for u in s ] for s in self.test_descr ]