Update.
authorFrançois Fleuret <francois@fleuret.org>
Sat, 13 Jan 2024 21:25:09 +0000 (22:25 +0100)
committerFrançois Fleuret <francois@fleuret.org>
Sat, 13 Jan 2024 21:25:09 +0000 (22:25 +0100)
main.py
mygpt.py

diff --git a/main.py b/main.py
index c22ae57..3e67a73 100755 (executable)
--- a/main.py
+++ b/main.py
@@ -465,15 +465,15 @@ with os.popen("sha256sum *.py") as f:
         log_string(f"sha256sum {l.strip()}")
 
 now = time.strftime("%Y%m%d-%H%M%S", time.localtime())
-os.system(f"tar --ignore-failed-read zcvf {args.result_dir}/src-{now}.tgz *.py *.sh")
+os.system(f"tar zcvf {args.result_dir}/src-{now}.tgz *.py *.sh")
 
 log_string(f"argv {' '.join(sys.argv)}")
 
 for n in vars(args):
     log_string(f"args.{n} {getattr(args, n)}")
 
-for n in vars(sup_args):
-    log_string(f"sup_args.{n} {getattr(sup_args, n)}")
+for k, v in sup_args.items():
+    log_string(f'sup_args["{k}"] "{v}"')
 
 
 ######################################################################
index 7c9991f..099847c 100755 (executable)
--- a/mygpt.py
+++ b/mygpt.py
@@ -493,14 +493,16 @@ class Caterpillar(nn.Module):
 
         self.proba_gate_dropout = 0.0
 
-        default_b_G = kwargs.get("default_b_G")
-        if default_b_G is None:
-            default_b_G = -math.log(caterpillar_height - 1)
+        default_bg = kwargs.get("default_bg")
+        if default_bg is None:
+            default_bg = -math.log(caterpillar_height - 1)
+        else:
+            default_bg = float(default_bg)
 
-        logger(f"default_b_G {default_b_G}")
+        logger(f"default_bg {default_bg}")
 
         self.w_G = randw(nb_heads, caterpillar_height, dim_model)
-        self.b_G = nn.Parameter(torch.full((nb_heads, caterpillar_height), default_b_G))
+        self.b_G = nn.Parameter(torch.full((nb_heads, caterpillar_height), default_bg))
 
         self.w_K = randw(nb_heads, dim_qk, dim_model)
         self.w_V = randw(nb_heads, dim_v, dim_model)