# iterations.
#
# The environment is a rectangular area with walls "#" dispatched
-# randomly. The agent "@" can perform five actions: move NESW or do
-# not move.
+# randomly. The agent "@" can perform five actions: move "NESW" or be
+# immobile "I".
#
# There are monsters "$" moving randomly. The agent gets hit by every
# monster present in one of the 4 direct neighborhoods at the end of
# "B", "C"). The keys and vault can only be used in sequence:
# initially the agent can move only to free spaces, or to the "a", in
# which case the key is removed from the environment and the agent now
-# carries it, and can move to free spaces or the "A". When it moves to
-# the "A", it gets a reward, loses the "a", the "A" is removed from
+# carries it, it appears in the inventory at the bottom of the frame,
+# and the agent can now move to free spaces or the "A". When it moves
+# to the "A", it gets a reward, loses the "a", the "A" is removed from
# the environment, but the agent can now move to the "b", etc. Rewards
# are 1 for "A" and "B" and 10 for "C".
r = u.sort(dim=-1, descending=True).indices[:, : len(z)]
q *= self.tile2id["#"]
- q[
- torch.arange(q.size(0), device=q.device)[:, None].expand_as(r), r
- ] = torch.tensor([self.tile2id[c] for c in z], device=q.device)[None, :]
+ q[torch.arange(q.size(0), device=q.device)[:, None].expand_as(r), r] = (
+ torch.tensor([self.tile2id[c] for c in z], device=q.device)[None, :]
+ )
if world_margin > 0:
r = m.new_full(
def action2str(self, n):
if n >= 0 and n < 5:
- return "XNESW"[n]
+ return "INESW"[n]
else:
return "?"