def __init__(self, x_player, o_player): self.players = [x_player, o_player] self.board = Board() self.player_number = 0 for player, piece in zip(self.players, [Board.X, Board.O]): run_if_learner(player, lambda: player.reset()) player.set_board(self.board) player.set_piece(piece)
def _train_batch(self, player1, player2, stat_type): run_if_learner(self.player1, lambda: self.player1.enable_learning()) run_if_learner(self.player2, lambda: self.player2.enable_learning()) stats = self._init_stats() for batch_number in range(self.num_batches): winner = self._train_game(player1, player2) stats[winner] += 1 self._show_stats(stat_type, stats) return stats
def _train_game(self, player1, player2): controller = GameController(player1, player2) winner = None while winner is None: winner, _ = controller.make_move() run_if_learner(player1, lambda: player1.store_state()) run_if_learner(player2, lambda: player2.store_state()) run_if_learner(player1, lambda: player1.set_reward(winner)) run_if_learner(player2, lambda: player2.set_reward(winner)) return winner
def test_run_if_learner_does_not_run_function_if_human(self): run_if_learner(self.player, self.func) self.func.assert_not_called()
def test_run_if_learner_runs_function_if_td_learner(self): run_if_learner(self.player, self.func) self.func.assert_called_once_with()
def get_and_load_player(self, player_type, piece): player = player_types.get_player(player_type) run_if_learner(player, lambda: player.load(piece)) return player
def swap_players(self, player1, player2): run_if_learner(player1, lambda: player1.load(Board.O)) run_if_learner(player2, lambda: player2.load(Board.X)) return player2, player1