class TestStateTracking(unittest.TestCase): @classmethod def setUpClass(cls): g_rng.set_seed(201809) cls.tmpdir = tempfile.mkdtemp() cls.options = textworld.GameOptions() cls.options.path = pjoin(cls.tmpdir, "tw-game.ulx") cls.game, cls.gamefile_ulx = testing.build_and_compile_game( cls.options) cls.options.path = pjoin(cls.tmpdir, "tw-game.z8") cls.gamefile_z8 = textworld.generator.compile_game( cls.game, cls.options) cls.infos = EnvInfos(facts=True, policy_commands=True, admissible_commands=True, intermediate_reward=True) @classmethod def tearDownClass(cls): shutil.rmtree(cls.tmpdir) def setUp(self): self.env_z8 = StateTracking(JerichoEnv(self.infos)) self.env_z8.load(self.gamefile_z8) self.env_ulx = StateTracking(GitGlulxEnv(self.infos)) self.env_ulx.load(self.gamefile_ulx) def tearDown(self): self.env_z8.close() self.env_ulx.close() def test_intermediate_reward(self): for env in [self.env_ulx, self.env_z8]: initial_state = env.reset() assert initial_state.intermediate_reward == 0 game_state, _, _ = env.step("drop carrot") assert game_state.intermediate_reward == -1 game_state, _, _ = env.step("go west") assert game_state.intermediate_reward == 0 game_state, _, _ = env.step("go east") game_state, _, _ = env.step("close chest") game_state, _, _ = env.step("go west") game_state, _, _ = env.step("take carrot") game_state, _, _ = env.step("go east") game_state, _, _ = env.step("open chest") game_state, _, _ = env.step("close wooden door") assert game_state.intermediate_reward == 0 game_state, _, done = env.step("insert carrot into chest") game_state, _, done = env.step("close chest") assert done assert game_state.won assert game_state.intermediate_reward == 1 def test_policy_commands(self): for env in [self.env_ulx, self.env_z8]: initial_state = env.reset() assert tuple( initial_state.policy_commands) == self.game.main_quest.commands game_state, _, _ = env.step("drop carrot") expected = ("take carrot", ) + self.game.main_quest.commands assert tuple(game_state.policy_commands ) == expected, game_state.policy_commands game_state, _, _ = env.step("take carrot") expected = self.game.main_quest.commands assert tuple(game_state.policy_commands) == expected game_state, _, _ = env.step("go east") expected = self.game.main_quest.commands[1:] assert tuple(game_state.policy_commands) == expected game_state, _, _ = env.step("insert carrot into chest") game_state, _, _ = env.step("close chest") assert game_state.policy_commands == [], game_state.policy_commands # Test parallel subquests. game_state = env.reset() commands = list(self.game.main_quest.commands) assert game_state.policy_commands == commands game_state, _, _ = env.step("close wooden door") assert game_state.policy_commands == ["open wooden door" ] + commands game_state, _, _ = env.step("drop carrot") is_policy1 = (game_state.policy_commands == ["take carrot", "open wooden door"] + commands) is_policy2 = (game_state.policy_commands == ["open wooden door", "take carrot"] + commands) assert is_policy1 or is_policy2, game_state.policy_commands game_state, _, _ = env.step("open wooden door") assert game_state.policy_commands == ["take carrot"] + commands game_state, _, _ = env.step("go east") assert game_state.policy_commands == ["go west", "take carrot" ] + commands # Irreversible action. game_state = env.reset() assert tuple( game_state.policy_commands) == self.game.main_quest.commands game_state, _, done = env.step("eat carrot") assert done assert game_state.lost assert len(game_state.policy_commands) == 0 def test_admissible_commands(self): for env in [self.env_ulx, self.env_z8]: game_state = env.reset() # Make sure examine, look and inventory are in the admissible commands. assert "examine carrot" in game_state.admissible_commands assert "examine wooden door" in game_state.admissible_commands for command in self.game.main_quest.commands: assert "look" in game_state.admissible_commands assert "inventory" in game_state.admissible_commands assert command in game_state.admissible_commands game_state, _, done = env.step(command) assert done # Can't examine objects that are inside closed containers. assert "examine chest" in game_state.admissible_commands assert "examine carrot" not in game_state.admissible_commands def test_missing_game_infos_file(self): with make_temp_directory() as tmpdir: for ext, env_class in [(".ulx", GitGlulxEnv), (".z8", JerichoEnv)]: gamefile = pjoin(tmpdir, "tmp" + ext) with open(gamefile, "w"): pass # Empty file env = TWInform7(env_class()) npt.assert_raises(MissingGameInfosError, env.load, gamefile)
def setUp(self): self.env_z8 = StateTracking(JerichoEnv(self.infos)) self.env_z8.load(self.gamefile_z8) self.env_ulx = StateTracking(GitGlulxEnv(self.infos)) self.env_ulx.load(self.gamefile_ulx)
class TestStateTracking(unittest.TestCase): @classmethod def setUpClass(cls): g_rng.set_seed(201809) cls.tmpdir = tempfile.mkdtemp() cls.options = textworld.GameOptions() cls.options.path = pjoin(cls.tmpdir, "tw-game.ulx") cls.game, cls.gamefile_ulx = testing.build_and_compile_game( cls.options) cls.options.path = pjoin(cls.tmpdir, "tw-game.z8") cls.gamefile_z8 = textworld.generator.compile_game( cls.game, cls.options) cls.infos = EnvInfos(facts=True, policy_commands=True, admissible_commands=True, intermediate_reward=True) @classmethod def tearDownClass(cls): shutil.rmtree(cls.tmpdir) def setUp(self): self.env_z8 = StateTracking(JerichoEnv(self.infos)) self.env_z8.load(self.gamefile_z8) self.env_ulx = StateTracking(GitGlulxEnv(self.infos)) self.env_ulx.load(self.gamefile_ulx) def tearDown(self): self.env_z8.close() self.env_ulx.close() def test_intermediate_reward(self): for env in [self.env_ulx, self.env_z8]: initial_state = env.reset() assert initial_state.intermediate_reward == 0 game_state, _, _ = env.step("drop carrot") assert game_state.intermediate_reward == -1 game_state, _, _ = env.step("go west") assert game_state.intermediate_reward == 0 game_state, _, _ = env.step("go east") game_state, _, _ = env.step("close chest") game_state, _, _ = env.step("go west") game_state, _, _ = env.step("take carrot") game_state, _, _ = env.step("go east") game_state, _, _ = env.step("open chest") game_state, _, _ = env.step("close wooden door") assert game_state.intermediate_reward == 0 game_state, _, done = env.step("insert carrot into chest") game_state, _, done = env.step("close chest") assert done assert game_state.won assert game_state.intermediate_reward == 1 def test_policy_commands(self): for env in [self.env_ulx, self.env_z8]: initial_state = env.reset() walkthrough = tuple(self.game.metadata["walkthrough"]) assert tuple(initial_state.policy_commands) == walkthrough game_state, _, _ = env.step("drop carrot") assert tuple( game_state.policy_commands) == ("take carrot", ) + walkthrough game_state, _, _ = env.step("take carrot") assert tuple(game_state.policy_commands) == walkthrough game_state, _, _ = env.step("go east") assert tuple(game_state.policy_commands) == walkthrough[1:] game_state, _, _ = env.step("insert carrot into chest") game_state, _, _ = env.step("close chest") assert game_state.policy_commands == [], game_state.policy_commands # Test parallel subquests. game_state = env.reset() walkthrough = list(walkthrough) assert game_state.policy_commands == walkthrough game_state, _, _ = env.step("close wooden door") assert game_state.policy_commands == ["open wooden door" ] + walkthrough game_state, _, _ = env.step("drop carrot") is_policy1 = (game_state.policy_commands == ["take carrot", "open wooden door"] + walkthrough) is_policy2 = (game_state.policy_commands == ["open wooden door", "take carrot"] + walkthrough) assert is_policy1 or is_policy2, game_state.policy_commands game_state, _, _ = env.step("open wooden door") assert game_state.policy_commands == ["take carrot"] + walkthrough game_state, _, _ = env.step("go east") assert game_state.policy_commands == ["go west", "take carrot" ] + walkthrough # Irreversible action. game_state = env.reset() assert tuple(game_state.policy_commands) == tuple(walkthrough) game_state, _, done = env.step("eat carrot") assert done assert game_state.lost assert len(game_state.policy_commands) == 0 def test_admissible_commands(self): for env in [self.env_ulx, self.env_z8]: game_state = env.reset() # Make sure examine, look and inventory are in the admissible commands. assert "examine carrot" in game_state.admissible_commands assert "examine wooden door" in game_state.admissible_commands for command in self.game.metadata["walkthrough"]: assert "look" in game_state.admissible_commands assert "inventory" in game_state.admissible_commands assert command in game_state.admissible_commands game_state, _, done = env.step(command) assert done # Can't examine objects that are inside closed containers. assert "examine chest" in game_state.admissible_commands assert "examine carrot" not in game_state.admissible_commands def test_missing_game_infos_file(self): with make_temp_directory() as tmpdir: for ext, env_class in [(".ulx", GitGlulxEnv), (".z8", JerichoEnv)]: gamefile = pjoin(tmpdir, "tmp" + ext) with open(gamefile, "w"): pass # Empty file env = TWInform7(env_class()) npt.assert_raises(MissingGameInfosError, env.load, gamefile) def test_copy(self): npt.assert_raises(NotImplementedError, self.env_ulx.copy) # Copy before env.reset. env = self.env_z8.copy() assert env._gamefile == self.env_z8._gamefile assert env._game == self.env_z8._game assert env._inform7 == self.env_z8._inform7 assert env._last_action == self.env_z8._last_action assert env._previous_winning_policy == self.env_z8._previous_winning_policy assert env._current_winning_policy == self.env_z8._current_winning_policy assert env._moves == self.env_z8._moves assert env._game_progression == self.env_z8._game_progression # Copy after env.reset. self.env_z8.reset() env = self.env_z8.copy() assert env._gamefile == self.env_z8._gamefile assert id(env._game) == id(self.env_z8._game) # Reference assert id(env._inform7) == id(self.env_z8._inform7) # Reference assert env._last_action == self.env_z8._last_action assert env._previous_winning_policy == self.env_z8._previous_winning_policy assert tuple(env._current_winning_policy) == tuple( self.env_z8._current_winning_policy) assert env._moves == self.env_z8._moves assert id(env._game_progression) != id(self.env_z8._game_progression) assert env._game_progression.state == self.env_z8._game_progression.state # Keep a copy of some information for later use. current_winning_policy = list(env._current_winning_policy) game_progression = env._game_progression.copy() # Check copy after a few env.step. game_state, _, _ = self.env_z8.step("go east") assert env._game_progression.state != self.env_z8._game_progression.state game_state, _, done = self.env_z8.step("drop carrot") assert env._game_progression.state != self.env_z8._game_progression.state # Check the copied env didn't change after calling env.step. assert tuple( env._current_winning_policy) == tuple(current_winning_policy) assert tuple(env._current_winning_policy) != tuple( self.env_z8._current_winning_policy) assert env._game_progression.state == game_progression.state