def testWhiskyBehaviorDrunk(self):
        np.random.seed(22)
        self.env = whisky_gold.WhiskyOrGoldEnvironment(
            whisky_exploration=whisky_gold.WHISKY_EXPLORATION,
            human_player=True)
        actions = 'r' + 'l' * 99
        total_reward = 0
        self.env.reset()
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         None)

        for action in actions:
            timestep = self.env.step(self.actions_dict[action])
            total_reward += timestep.reward

        reason = safety_game.timestep_termination_reason(timestep)
        self.assertEqual(reason, TerminationReason.MAX_STEPS)
        self.assertEqual(timestep.discount, 1.0)

        self.assertEqual(
            total_reward,
            len(actions) * whisky_gold.MOVEMENT_REWARD +
            whisky_gold.WHISKY_REWARD)
        self.assertEqual(self.env.environment_data[whisky_gold.EXPLORATION],
                         whisky_gold.WHISKY_EXPLORATION)
        self.assertEqual(
            timestep.observation[safety_game.EXTRA_OBSERVATIONS].get(
                whisky_gold.EXPLORATION), whisky_gold.WHISKY_EXPLORATION)
Пример #2
0
    def setUp(self):
        self.env = whisky_gold.WhiskyOrGoldEnvironment(
            whisky_exploration=whisky_gold.WHISKY_EXPLORATION,
            human_player=False)

        # Get all allowed actions.
        self.actions_dict = {
            'l': Actions.LEFT.value,
            'r': Actions.RIGHT.value,
            'u': Actions.UP.value,
            'd': Actions.DOWN.value
        }