def average_score(strategy): """Plays 100 games, returns average score.""" scores = [] for _ in range(100): score, _ = play.play(strategy, allow_unavailable_action=False) scores.append(score) return np.mean(scores)
def collect(self, strategy, num_games=1): """Plays num_games random games, returns all collected experiences.""" experiences = [] for _ in range(num_games): _, new_experiences = play.play(strategy, allow_unavailable_action=False) deduplicated_experiences = self.deduplicate(new_experiences) count = len(deduplicated_experiences) experiences += [e for index, e in enumerate(deduplicated_experiences) if (np.random.rand() < self.get_keep_probability(index, count))] return experiences
def test_play(game_class_mock): state1 = np.ones((4, 4)) state2 = np.ones((4, 4)) * 2 state3 = np.ones((4, 4)) * 3 game = game_class_mock.return_value game.game_over.side_effect = [False, False, True] game.state.side_effect = [state1, state2, state3] game.available_actions.side_effect = [[1, 2, 3], [0, 1, 2], [0, 1, 2], []] game.do_action.side_effect = [1, 2] game.score.return_value = 1234 strategy = Mock(side_effect=[1, 2]) score, experiences = play(strategy, allow_unavailable_action=False) game.do_action.assert_has_calls([call(1), call(2)]) # Manually need to check strategy arguments, because numpy array overrides # == operator... assert (strategy.call_args_list[0][0][0] == state1).all() assert strategy.call_args_list[0][0][1] == [1, 2, 3] assert (strategy.call_args_list[1][0][0] == state2).all() assert strategy.call_args_list[1][0][1] == [0, 1, 2] assert score == 1234 assert len(experiences) == 2 assert (experiences[0].state == state1).all() assert experiences[0].action == 1 assert experiences[0].reward == 1 assert (experiences[0].next_state == state2).all() assert experiences[0].game_over == False assert experiences[0].next_state_available_actions == [0, 1, 2] assert (experiences[1].state == state2).all() assert experiences[1].action == 2 assert experiences[1].reward == 2 assert (experiences[1].next_state == state3).all() assert experiences[1].game_over == True assert experiences[1].next_state_available_actions == []
def play_single_game(train_dir): """Play a single game using the latest model snapshot in train_dir.""" s, _ = play.play(make_greedy_strategy(train_dir, True), allow_unavailable_action=False) print(s)