Пример #1
0
    def test_run(self):
        archivefile = tempfile.mktemp(suffix="nethack_test", prefix=".zip")
        game = nethack.NetHack(archivefile=archivefile)

        response = game.reset()
        actions = [
            nethack.MiscAction.MORE,
            nethack.MiscAction.MORE,
            nethack.MiscAction.MORE,
            nethack.MiscAction.MORE,
            nethack.MiscAction.MORE,
            nethack.MiscAction.MORE,
        ]

        for action in actions:
            while not response.ProgramState().InMoveloop():
                response, done, info = game.step(nethack.MiscAction.MORE)

            response, done, info = game.step(action)
            if done:
                # Only the good die young.
                response = game.reset()

            obs = response.Observation()
            chars = _fb_ndarray_to_np(obs.Chars())
            glyphs = _fb_ndarray_to_np(obs.Glyphs())

            status = response.Blstats()
            x, y = status.CursX(), status.CursY()

            self.assertEqual(np.count_nonzero(chars == ord("@")), 1)
            self.assertEqual(chars[y, x], ord("@"))

            mon = nethack.permonst(nethack.glyph_to_mon(glyphs[y][x]))
            self.assertEqual(mon.mname, "monk")
            self.assertEqual(mon.mlevel, 10)

            class_sym = nethack.class_sym.from_mlet(mon.mlet)
            self.assertEqual(class_sym.sym, "@")
            self.assertEqual(class_sym.explain, "human or elf")

        self.assertEqual(os.waitpid(info["pid"], os.WNOHANG), (0, 0))

        del game  # Should kill process.

        with self.assertRaisesRegex(OSError, "No (child|such)? process"):
            os.waitpid(info["pid"], 0)
Пример #2
0
def main():
    episodes = 0
    steps = 0

    game = nethack.NetHack(archivefile="random.zip")

    start = timeit.default_timer()
    while True:
        steps_delta = play(game, False)
        time_delta = timeit.default_timer() - start

        episodes += 1
        steps += steps_delta

        print("Episde: %i. Steps: %i. SPS: %f" %
              (episodes, steps, steps_delta / time_delta))
        start = timeit.default_timer()
Пример #3
0
def play(should_stop, queue):
    game = nethack.NetHack(archivefile=None)

    done = True
    steps = 0

    while not should_stop.is_set():
        if done or steps >= 1000:
            queue.put(steps)
            steps = 0
            observation = game.reset()

        if observation.Internal() and observation.Internal().Xwaitforspace():
            ch = nethack.MiscAction.MORE
        else:
            ch = random.choice(ACTIONS)

        observation, done, _ = game.step(ch)
        steps += 1
Пример #4
0
    def __init__(
        self,
        savedir=None,
        archivefile="nethack.%(pid)i.%(time)s.zip",
        character="mon-hum-neu-mal",
        max_episode_steps=5000,
        observation_keys=("glyphs", "status", "message", "inventory"),
        actions=None,
        options=None,
    ):
        """Constructs a new NLE environment.

        Args:
            savedir (str or None): path to save archives into. Defaults to None.
            archivefile (str or None): Template for the zip archive filename of
                NetHack ttyrec files. Use "%(pid)i" for the process id of the
                NetHack process, "%(time)s" for the creation time. Use None to
                disable writing archivefiles.
            character (str): name of character. Defaults to "mon-hum-neu-mal".
            max_episode_steps (int): maximum amount of steps allowed before the
                game is forcefully quit. In such cases, ``info["end_status"]``
                will be equal to ``StepStatus.ABORTED``. Defaults to 5000.
            observation_keys (list): keys to use when creating the observation.
                Defaults to all.
            actions (list): list of actions. If None, the full action space will
                be used, i.e. ``nle.nethack.ACTIONS``. Defaults to None.
            options (list): list of game options to initialize NetHack. If None,
                NetHack will be initialized with the options found in
                ``nle.nethack.NETHACKOPTIONS`. Defaults to None.
        """

        self.character = character
        self._max_episode_steps = max_episode_steps

        if actions is None:
            actions = FULL_ACTIONS
        self._actions = actions

        self.response = None

        if archivefile is not None:
            try:
                if savedir is None:
                    parent_dir = os.path.join(os.getcwd(), "nle_data")
                    os.makedirs(parent_dir, exist_ok=True)
                    # Create a unique subdirectory for us.
                    self.savedir = tempfile.mkdtemp(
                        prefix=time.strftime("%Y%m%d-%H%M%S_"), dir=parent_dir)
                else:
                    self.savedir = savedir
                    os.makedirs(self.savedir)
            except FileExistsError:
                logger.info("Using existing savedir: %s", self.savedir)
            else:
                logger.info("Created savedir: %s", self.savedir)

            self.archivefile = os.path.join(self.savedir, archivefile)
        else:
            self.savedir = None
            self.archivefile = None
            self._stats_file = None
            self._stats_logger = None

        self._setup_statsfile = archivefile is not None

        self.env = nethack.NetHack(
            archivefile=self.archivefile,
            options=options,
            playername="Agent%(pid)i-" + self.character,
        )

        self._random = random.SystemRandom()

        # -1 so that it's 0-based on first reset
        self._episode = -1

        space_dict = {
            "glyphs":
            gym.spaces.Box(
                low=np.iinfo(np.int16).min,
                high=np.iinfo(np.int16).max,
                shape=DUNGEON_SHAPE,
                dtype=np.int16,
            ),
            "status":
            gym.spaces.Box(
                low=np.iinfo(np.int32).min,
                high=np.iinfo(np.int32).max,
                shape=(23, ),
                dtype=np.int32,
            ),
            "message":
            gym.spaces.Box(
                low=np.iinfo(np.uint8).min,
                high=np.iinfo(np.uint8).max,
                shape=(DEFAULT_MSG_PAD, ),
                dtype=np.uint8,
            ),
            "inventory":
            gym.spaces.Tuple((
                gym.spaces.Box(
                    low=np.iinfo(np.int16).min,
                    high=np.iinfo(np.int16).max,
                    shape=(DEFAULT_INV_PAD, ),
                    dtype=np.int16,
                ),
                gym.spaces.Box(
                    low=np.iinfo(np.uint8).min,
                    high=np.iinfo(np.uint8).max,
                    shape=(DEFAULT_INV_PAD, DEFAULT_INVSTR_PAD),
                    dtype=np.uint8,
                ),
                gym.spaces.Box(
                    low=np.iinfo(np.uint8).min,
                    high=np.iinfo(np.uint8).max,
                    shape=(DEFAULT_INV_PAD, ),
                    dtype=np.uint8,
                ),
                gym.spaces.Box(
                    low=np.iinfo(np.uint8).min,
                    high=np.iinfo(np.uint8).max,
                    shape=(DEFAULT_INV_PAD, ),
                    dtype=np.uint8,
                ),
            )),
        }

        self.observation_space = gym.spaces.Dict(
            {key: space_dict[key]
             for key in observation_keys})

        self._key_functions = {
            "glyphs": _get_glyphs,
            "status": _get_status_fast,
            "message": _get_padded_message,
            "inventory": _get_padded_inv,
        }
        for key in list(self._key_functions.keys()):
            if key not in observation_keys:
                del self._key_functions[key]

        self.action_space = gym.spaces.Discrete(len(self._actions))
Пример #5
0
 def test_forking_with_nethack_in_parent(self, num_procs=NUM_SUBPROCESSES):
     # Breaks for pyzmq <= 18.0.0, fixed in
     # https://github.com/zeromq/pyzmq/commit/28c2a36836fc45c09ede4d9962498db449b642d1 # noqa
     env = nethack.NetHack(archivefile=None)  # noqa: F841
     self.assertEqual(_run_nethack_in_subprocesses(num_procs), [0] * num_procs)
Пример #6
0
 def test_forking_with_nethack_in_parent_new_context(
     self, num_procs=NUM_SUBPROCESSES
 ):
     env = nethack.NetHack(archivefile=None, context=zmq.Context())  # noqa: F841
     self.assertEqual(_run_nethack_in_subprocesses(num_procs), [0] * num_procs)
Пример #7
0
def _run_nethack():
    env = nethack.NetHack(archivefile=None)
    env.reset()
    env.step(0)
    env.step(0)
    env.step(0)
Пример #8
0
def play(env_name, play_mode, ngames, max_steps, seeds, no_clear, no_render):
    is_raw_env = env_name == "nethack"

    if is_raw_env:
        # TODO save data somewhere reasonable
        env = nethack.NetHack(archivefile="./nle_data/play_data.zip")
    else:
        env = gym.make(env_name)
        if seeds is not None:
            env.seed(seeds)

    obs = env.reset()
    last_obs = None  # needed for "nethack" env

    steps = 0
    episodes = 0
    reward = 0.0
    action = None
    ch = None

    start_time = timeit.default_timer()
    while True:
        if not no_render:
            if not no_clear:
                os.system("cls" if os.name == "nt" else "clear")

            if not is_raw_env:
                print("Previous reward:", reward)
                print("Available actions:", env._actions)
                print("Previous action: {}{!r})".format(
                    "{} --".format(chr(ch)) if ch is not None else "",
                    env._actions[action] if action is not None else None,
                ))
                env.render()
            else:
                print("Available actions:", nle.env.base.FULL_ACTIONS)
                print("Previous actions:", action)
                print_message.print_message(obs)

        action = get_action(env, play_mode, is_raw_env)
        if action is None:
            break

        if is_raw_env:
            last_obs = obs
            obs, done, info = env.step(action)
        else:
            obs, reward, done, info = env.step(action)
        steps += 1

        done = done or steps >= max_steps
        if not done:
            continue

        if not is_raw_env:
            print("Final reward:", reward)
            print("End status:", info["end_status"].name)
            print("Env stats:")
            pprint.pprint(info["stats"])
        else:
            if last_obs.ProgramState().Gameover():
                # Print tombstone.
                if last_obs.WindowsLength() < 1:
                    return steps
                window = last_obs.Windows(1)

                if not no_render:
                    for i in range(window.StringsLength()):
                        print(window.Strings(i).decode("ascii"))

        time_delta = timeit.default_timer() - start_time
        print("Episode: {}. Steps: {}. SPS: {:f}".format(
            episodes, steps, steps / time_delta))
        start_time = timeit.default_timer()

        episodes += 1
        steps = 0
        if episodes == ngames:
            break
        env.reset()