def test_kick_and_quit(self, env): env.reset() kick = env.actions.index(nethack.Command.KICK) obs, reward, done, _ = env.step(kick) assert b"In what direction? " in bytes(obs["message"]) env.step(nethack.MiscAction.MORE) # Hack to quit. env.nethack.step(nethack.M("q")) obs, reward, done, _ = env.step(env.actions.index(ord("y"))) assert done assert reward == 0.0
def test_ttyrec_every(self): path = pathlib.Path(".") env = gym.make("NetHackChallenge-v0", save_ttyrec_every=2, savedir=str(path)) pid = os.getpid() for episode in range(10): env.reset() for c in [ord(" "), ord(" "), ord("<"), ord("y")]: _, _, done, *_ = env.step(env.actions.index(c)) assert done if episode % 2 != 0: continue contents = set(str(p) for p in path.iterdir()) # `contents` includes xlogfile and ttyrecs. assert len(contents) - 1 == episode // 2 + 1 assert ( "nle.%i.%i.ttyrec%i.bz2" % (pid, episode, nethack.TTYREC_VERSION) in contents ) assert "nle.%i.xlogfile" % pid in contents with open("nle.%i.xlogfile" % pid, "r") as f: entries = f.readlines() assert len(entries) == 10
def test_render_ansi(self, env_name, rollout_len): env = gym.make(env_name) env.reset() for _ in range(rollout_len): action = env.action_space.sample() _, _, done, _ = env.step(action) if done: env.reset() output = env.render(mode="ansi") assert isinstance(output, str) assert len(output.replace("\n", "")) == np.prod(nle.env.DUNGEON_SHAPE)
def main(args): env = make_venv(args) env.reset() start_time = time.time() for i in range(args.num_steps): env.step([np.random.randint(8)] * args.num_env) if (i - 1) % 200 == 0: env.reset() total_time_multi = time.time() - start_time print( "Took {:.2f}s with subproc={} on {} steps on {} envs - {:.2f} FPS".format( total_time_multi, args.subproc, args.num_steps, args.num_env, args.num_steps / total_time_multi, ) )
def test_final_reward(self, env): obs = env.reset() for _ in range(100): obs, reward, done, info = env.step(env.action_space.sample()) if done: break if done: assert reward == 0.0 return # Hopefully, we got some positive reward by now. # Get out of any menu / yn_function. env.step(env.actions.index(ord("\r"))) # Hack to quit. env.nethack.step(nethack.M("q")) _, reward, done, _ = env.step(env.actions.index(ord("y"))) assert done assert reward == 0.0
def rollout_env(env, max_rollout_len): """Produces a rollout and asserts step outputs. Does *not* assume that the environment has already been reset. """ obs = env.reset() assert env.observation_space.contains(obs) step = 0 while True: a = env.action_space.sample() obs, reward, done, info = env.step(a) assert env.observation_space.contains(obs) assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) if done or step >= max_rollout_len: break env.close()
def rollout_env(env, max_rollout_len): """Produces a rollout and asserts step outputs. Returns final reward. Does not assume that the environment has already been reset. """ obs = env.reset() assert env.observation_space.contains(obs) for _ in range(max_rollout_len): a = env.action_space.sample() obs, reward, done, info = env.step(a) assert env.observation_space.contains(obs) assert isinstance(reward, float) assert isinstance(done, bool) assert isinstance(info, dict) if done: break env.close() return reward
def test_no_reset(self, env): with pytest.raises(RuntimeError, match="step called without reset()"): env.step(0)