class TestStateTracking(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        g_rng.set_seed(201809)
        cls.tmpdir = tempfile.mkdtemp()
        cls.options = textworld.GameOptions()
        cls.options.path = pjoin(cls.tmpdir, "tw-game.ulx")
        cls.game, cls.gamefile_ulx = testing.build_and_compile_game(
            cls.options)
        cls.options.path = pjoin(cls.tmpdir, "tw-game.z8")
        cls.gamefile_z8 = textworld.generator.compile_game(
            cls.game, cls.options)
        cls.infos = EnvInfos(facts=True,
                             policy_commands=True,
                             admissible_commands=True,
                             intermediate_reward=True)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdir)

    def setUp(self):
        self.env_z8 = StateTracking(JerichoEnv(self.infos))
        self.env_z8.load(self.gamefile_z8)

        self.env_ulx = StateTracking(GitGlulxEnv(self.infos))
        self.env_ulx.load(self.gamefile_ulx)

    def tearDown(self):
        self.env_z8.close()
        self.env_ulx.close()

    def test_intermediate_reward(self):
        for env in [self.env_ulx, self.env_z8]:
            initial_state = env.reset()

            assert initial_state.intermediate_reward == 0
            game_state, _, _ = env.step("drop carrot")
            assert game_state.intermediate_reward == -1
            game_state, _, _ = env.step("go west")
            assert game_state.intermediate_reward == 0
            game_state, _, _ = env.step("go east")
            game_state, _, _ = env.step("close chest")
            game_state, _, _ = env.step("go west")
            game_state, _, _ = env.step("take carrot")
            game_state, _, _ = env.step("go east")
            game_state, _, _ = env.step("open chest")
            game_state, _, _ = env.step("close wooden door")
            assert game_state.intermediate_reward == 0
            game_state, _, done = env.step("insert carrot into chest")
            game_state, _, done = env.step("close chest")
            assert done
            assert game_state.won
            assert game_state.intermediate_reward == 1

    def test_policy_commands(self):
        for env in [self.env_ulx, self.env_z8]:
            initial_state = env.reset()

            assert tuple(
                initial_state.policy_commands) == self.game.main_quest.commands

            game_state, _, _ = env.step("drop carrot")
            expected = ("take carrot", ) + self.game.main_quest.commands
            assert tuple(game_state.policy_commands
                         ) == expected, game_state.policy_commands

            game_state, _, _ = env.step("take carrot")
            expected = self.game.main_quest.commands
            assert tuple(game_state.policy_commands) == expected

            game_state, _, _ = env.step("go east")
            expected = self.game.main_quest.commands[1:]
            assert tuple(game_state.policy_commands) == expected

            game_state, _, _ = env.step("insert carrot into chest")
            game_state, _, _ = env.step("close chest")
            assert game_state.policy_commands == [], game_state.policy_commands

            # Test parallel subquests.
            game_state = env.reset()
            commands = list(self.game.main_quest.commands)
            assert game_state.policy_commands == commands
            game_state, _, _ = env.step("close wooden door")
            assert game_state.policy_commands == ["open wooden door"
                                                  ] + commands
            game_state, _, _ = env.step("drop carrot")
            is_policy1 = (game_state.policy_commands ==
                          ["take carrot", "open wooden door"] + commands)
            is_policy2 = (game_state.policy_commands ==
                          ["open wooden door", "take carrot"] + commands)
            assert is_policy1 or is_policy2, game_state.policy_commands
            game_state, _, _ = env.step("open wooden door")
            assert game_state.policy_commands == ["take carrot"] + commands
            game_state, _, _ = env.step("go east")
            assert game_state.policy_commands == ["go west", "take carrot"
                                                  ] + commands

            # Irreversible action.
            game_state = env.reset()
            assert tuple(
                game_state.policy_commands) == self.game.main_quest.commands
            game_state, _, done = env.step("eat carrot")
            assert done
            assert game_state.lost
            assert len(game_state.policy_commands) == 0

    def test_admissible_commands(self):
        for env in [self.env_ulx, self.env_z8]:
            game_state = env.reset()
            # Make sure examine, look and inventory are in the admissible commands.
            assert "examine carrot" in game_state.admissible_commands
            assert "examine wooden door" in game_state.admissible_commands

            for command in self.game.main_quest.commands:
                assert "look" in game_state.admissible_commands
                assert "inventory" in game_state.admissible_commands
                assert command in game_state.admissible_commands
                game_state, _, done = env.step(command)

            assert done
            # Can't examine objects that are inside closed containers.
            assert "examine chest" in game_state.admissible_commands
            assert "examine carrot" not in game_state.admissible_commands

    def test_missing_game_infos_file(self):
        with make_temp_directory() as tmpdir:
            for ext, env_class in [(".ulx", GitGlulxEnv), (".z8", JerichoEnv)]:
                gamefile = pjoin(tmpdir, "tmp" + ext)
                with open(gamefile, "w"):
                    pass  # Empty file

                env = TWInform7(env_class())
                npt.assert_raises(MissingGameInfosError, env.load, gamefile)
    def setUp(self):
        self.env_z8 = StateTracking(JerichoEnv(self.infos))
        self.env_z8.load(self.gamefile_z8)

        self.env_ulx = StateTracking(GitGlulxEnv(self.infos))
        self.env_ulx.load(self.gamefile_ulx)
Пример #3
0
class TestStateTracking(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        g_rng.set_seed(201809)
        cls.tmpdir = tempfile.mkdtemp()
        cls.options = textworld.GameOptions()
        cls.options.path = pjoin(cls.tmpdir, "tw-game.ulx")
        cls.game, cls.gamefile_ulx = testing.build_and_compile_game(
            cls.options)
        cls.options.path = pjoin(cls.tmpdir, "tw-game.z8")
        cls.gamefile_z8 = textworld.generator.compile_game(
            cls.game, cls.options)
        cls.infos = EnvInfos(facts=True,
                             policy_commands=True,
                             admissible_commands=True,
                             intermediate_reward=True)

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdir)

    def setUp(self):
        self.env_z8 = StateTracking(JerichoEnv(self.infos))
        self.env_z8.load(self.gamefile_z8)

        self.env_ulx = StateTracking(GitGlulxEnv(self.infos))
        self.env_ulx.load(self.gamefile_ulx)

    def tearDown(self):
        self.env_z8.close()
        self.env_ulx.close()

    def test_intermediate_reward(self):
        for env in [self.env_ulx, self.env_z8]:
            initial_state = env.reset()

            assert initial_state.intermediate_reward == 0
            game_state, _, _ = env.step("drop carrot")
            assert game_state.intermediate_reward == -1
            game_state, _, _ = env.step("go west")
            assert game_state.intermediate_reward == 0
            game_state, _, _ = env.step("go east")
            game_state, _, _ = env.step("close chest")
            game_state, _, _ = env.step("go west")
            game_state, _, _ = env.step("take carrot")
            game_state, _, _ = env.step("go east")
            game_state, _, _ = env.step("open chest")
            game_state, _, _ = env.step("close wooden door")
            assert game_state.intermediate_reward == 0
            game_state, _, done = env.step("insert carrot into chest")
            game_state, _, done = env.step("close chest")
            assert done
            assert game_state.won
            assert game_state.intermediate_reward == 1

    def test_policy_commands(self):
        for env in [self.env_ulx, self.env_z8]:
            initial_state = env.reset()
            walkthrough = tuple(self.game.metadata["walkthrough"])

            assert tuple(initial_state.policy_commands) == walkthrough

            game_state, _, _ = env.step("drop carrot")
            assert tuple(
                game_state.policy_commands) == ("take carrot", ) + walkthrough

            game_state, _, _ = env.step("take carrot")
            assert tuple(game_state.policy_commands) == walkthrough

            game_state, _, _ = env.step("go east")
            assert tuple(game_state.policy_commands) == walkthrough[1:]

            game_state, _, _ = env.step("insert carrot into chest")
            game_state, _, _ = env.step("close chest")
            assert game_state.policy_commands == [], game_state.policy_commands

            # Test parallel subquests.
            game_state = env.reset()
            walkthrough = list(walkthrough)
            assert game_state.policy_commands == walkthrough
            game_state, _, _ = env.step("close wooden door")
            assert game_state.policy_commands == ["open wooden door"
                                                  ] + walkthrough
            game_state, _, _ = env.step("drop carrot")
            is_policy1 = (game_state.policy_commands ==
                          ["take carrot", "open wooden door"] + walkthrough)
            is_policy2 = (game_state.policy_commands ==
                          ["open wooden door", "take carrot"] + walkthrough)
            assert is_policy1 or is_policy2, game_state.policy_commands
            game_state, _, _ = env.step("open wooden door")
            assert game_state.policy_commands == ["take carrot"] + walkthrough
            game_state, _, _ = env.step("go east")
            assert game_state.policy_commands == ["go west", "take carrot"
                                                  ] + walkthrough

            # Irreversible action.
            game_state = env.reset()
            assert tuple(game_state.policy_commands) == tuple(walkthrough)
            game_state, _, done = env.step("eat carrot")
            assert done
            assert game_state.lost
            assert len(game_state.policy_commands) == 0

    def test_admissible_commands(self):
        for env in [self.env_ulx, self.env_z8]:
            game_state = env.reset()
            # Make sure examine, look and inventory are in the admissible commands.
            assert "examine carrot" in game_state.admissible_commands
            assert "examine wooden door" in game_state.admissible_commands

            for command in self.game.metadata["walkthrough"]:
                assert "look" in game_state.admissible_commands
                assert "inventory" in game_state.admissible_commands
                assert command in game_state.admissible_commands
                game_state, _, done = env.step(command)

            assert done
            # Can't examine objects that are inside closed containers.
            assert "examine chest" in game_state.admissible_commands
            assert "examine carrot" not in game_state.admissible_commands

    def test_missing_game_infos_file(self):
        with make_temp_directory() as tmpdir:
            for ext, env_class in [(".ulx", GitGlulxEnv), (".z8", JerichoEnv)]:
                gamefile = pjoin(tmpdir, "tmp" + ext)
                with open(gamefile, "w"):
                    pass  # Empty file

                env = TWInform7(env_class())
                npt.assert_raises(MissingGameInfosError, env.load, gamefile)

    def test_copy(self):
        npt.assert_raises(NotImplementedError, self.env_ulx.copy)

        # Copy before env.reset.
        env = self.env_z8.copy()
        assert env._gamefile == self.env_z8._gamefile
        assert env._game == self.env_z8._game
        assert env._inform7 == self.env_z8._inform7
        assert env._last_action == self.env_z8._last_action
        assert env._previous_winning_policy == self.env_z8._previous_winning_policy
        assert env._current_winning_policy == self.env_z8._current_winning_policy
        assert env._moves == self.env_z8._moves
        assert env._game_progression == self.env_z8._game_progression

        # Copy after env.reset.
        self.env_z8.reset()
        env = self.env_z8.copy()
        assert env._gamefile == self.env_z8._gamefile
        assert id(env._game) == id(self.env_z8._game)  # Reference
        assert id(env._inform7) == id(self.env_z8._inform7)  # Reference
        assert env._last_action == self.env_z8._last_action
        assert env._previous_winning_policy == self.env_z8._previous_winning_policy
        assert tuple(env._current_winning_policy) == tuple(
            self.env_z8._current_winning_policy)
        assert env._moves == self.env_z8._moves
        assert id(env._game_progression) != id(self.env_z8._game_progression)
        assert env._game_progression.state == self.env_z8._game_progression.state

        # Keep a copy of some information for later use.
        current_winning_policy = list(env._current_winning_policy)
        game_progression = env._game_progression.copy()

        # Check copy after a few env.step.
        game_state, _, _ = self.env_z8.step("go east")
        assert env._game_progression.state != self.env_z8._game_progression.state
        game_state, _, done = self.env_z8.step("drop carrot")
        assert env._game_progression.state != self.env_z8._game_progression.state

        # Check the copied env didn't change after calling env.step.
        assert tuple(
            env._current_winning_policy) == tuple(current_winning_policy)
        assert tuple(env._current_winning_policy) != tuple(
            self.env_z8._current_winning_policy)
        assert env._game_progression.state == game_progression.state