Update.

[pytorch.git] / picocrafter.py
diff --git a/picocrafter.py b/picocrafter.py

index 5bd6a48..001bb81 100755 (executable)
--- a/picocrafter.py
+++ b/picocrafter.py
@@ -23,8 +23,8 @@
  # iterations.
  #
  # The environment is a rectangular area with walls "#" dispatched
-# randomly. The agent "@" can perform five actions: move NESW or do
-# not move.
+# randomly. The agent "@" can perform five actions: move "NESW" or be
+# immobile "I".
  #
  # There are monsters "$" moving randomly. The agent gets hit by every
  # monster present in one of the 4 direct neighborhoods at the end of
@@ -39,8 +39,9 @@
  # "B", "C"). The keys and vault can only be used in sequence:
  # initially the agent can move only to free spaces, or to the "a", in
  # which case the key is removed from the environment and the agent now
-# carries it, and can move to free spaces or the "A". When it moves to
-# the "A", it gets a reward, loses the "a", the "A" is removed from
+# carries it, it appears in the inventory at the bottom of the frame,
+# and the agent can now move to free spaces or the "A". When it moves
+# to the "A", it gets a reward, loses the "a", the "A" is removed from
  # the environment, but the agent can now move to the "b", etc. Rewards
  # are 1 for "A" and "B" and 10 for "C".
  
@@ -226,9 +227,9 @@ class PicroCrafterEnvironment:
          r = u.sort(dim=-1, descending=True).indices[:, : len(z)]
  
          q *= self.tile2id["#"]
-        q[
-            torch.arange(q.size(0), device=q.device)[:, None].expand_as(r), r
-        ] = torch.tensor([self.tile2id[c] for c in z], device=q.device)[None, :]
+        q[torch.arange(q.size(0), device=q.device)[:, None].expand_as(r), r] = (
+            torch.tensor([self.tile2id[c] for c in z], device=q.device)[None, :]
+        )
  
          if world_margin > 0:
              r = m.new_full(
@@ -244,7 +245,7 @@ class PicroCrafterEnvironment:
  
      def action2str(self, n):
          if n >= 0 and n < 5:
-            return "XNESW"[n]
+            return "INESW"[n]
          else:
              return "?"