X-Git-Url: https://www.fleuret.org/cgi-bin/gitweb/gitweb.cgi?p=pytorch.git;a=blobdiff_plain;f=picocrafter.py;fp=picocrafter.py;h=23d93b2cdd0d2c9a52bd76a6289dcf0e2d5df135;hp=5bd6a48e1d8a0906fb03b183c0b83b76a879fdd0;hb=c951f0b1b425dc91ba74e9cb75425b0ad2f481ac;hpb=679c210fdde56edfaf2a1859881139e9d6e81664 diff --git a/picocrafter.py b/picocrafter.py index 5bd6a48..23d93b2 100755 --- a/picocrafter.py +++ b/picocrafter.py @@ -23,8 +23,8 @@ # iterations. # # The environment is a rectangular area with walls "#" dispatched -# randomly. The agent "@" can perform five actions: move NESW or do -# not move. +# randomly. The agent "@" can perform five actions: move "NESW" or be +# immobile "I". # # There are monsters "$" moving randomly. The agent gets hit by every # monster present in one of the 4 direct neighborhoods at the end of @@ -39,8 +39,9 @@ # "B", "C"). The keys and vault can only be used in sequence: # initially the agent can move only to free spaces, or to the "a", in # which case the key is removed from the environment and the agent now -# carries it, and can move to free spaces or the "A". When it moves to -# the "A", it gets a reward, loses the "a", the "A" is removed from +# carries it, it appears in the inventory at the bottom of the frame, +# and the agent can now move to free spaces or the "A". When it moves +# to the "A", it gets a reward, loses the "a", the "A" is removed from # the environment, but the agent can now move to the "b", etc. Rewards # are 1 for "A" and "B" and 10 for "C". @@ -244,7 +245,7 @@ class PicroCrafterEnvironment: def action2str(self, n): if n >= 0 and n < 5: - return "XNESW"[n] + return "INESW"[n] else: return "?"