dropout=0.0,
len_max=1e5,
):
-
super().__init__()
assert dim_model % nb_heads == 0
m.bias.zero_()
m.weight.fill_(1.0)
- def forward(self, bs):
+ def forward(self, bs, mode="standard"):
bs.x = F.pad(bs.x, (1, -1))
bs = self.embedding(bs)
- bs = self.trunk(bs)
- bs = self.readout(bs)
+ if mode == "standard":
+ bs = self.trunk(bs)
+ bs = self.readout(bs)
+ elif mode == "head":
+ bs = self.trunk(bs)
+ elif mode == "deep":
+ r = []
+ for l in self.trunk:
+ bs = l(bs)
+ r += [bs.slice()]
+ bs = BracketedSequence(torch.cat(r, -1))
+ else:
+ raise ValueError
return bs
######################################################################
if __name__ == "__main__":
-
print("Basic check.")
vocabulary_size = 10