def __init__(self, logger: Callable = noop_print, random_seed: int = 54): self.winners = [] self.logger = logger self.agent = SarsaAgent(1) self.opponent = SarsaAgent(-1) self.game = TicTacToeGame() self.env = Environment(self.game, self.opponent) self._random = random.Random(random_seed)
class TestBed(object): def __init__(self, logger: Callable = noop_print, random_seed: int = 54): self.winners = [] self.logger = logger self.agent = SarsaAgent(1) self.opponent = SarsaAgent(-1) self.game = TicTacToeGame() self.env = Environment(self.game, self.opponent) self._random = random.Random(random_seed) def play_episodes(self, n=100): print('Episode: ') for i in range(n): if i % 100 == 0: print(f'{i+1}', end=' ') winner = self.play_episode() self.winners.append(winner) def play_episode(self): if self._random.random() < 0.5: # Let the opponent start 50% of the time self.env.opponent_play() state = self.env.state_rep() action = self.agent.act(state) for turn in range(1, 10): # Should be done in 5 reward = self.env.interact(action) new_state = self.env.state_rep() new_action = self.agent.act(new_state, self.game.available_positions()) self.agent.get_feedback(state, action, reward, new_state, new_action) state = new_state action = new_action if self.env.is_episode_complete: break self.logger(f"Episode complete. {turn} turns taken.", end=' ') result = self.game.winner() if result: self.logger(f"{self.game.repr_marker(result)} won!") else: self.logger("It was a draw") self.logger(self.env, end='\n\n\n\n') self.reset() return result def reset(self): self.game.reset() self.agent.reset() self.opponent.reset()
def test_check_game_over_1(self): """Test case for the check_game_over function. Test for game over with a win. """ game = TicTacToeGame() game.state = [[1, 0, -1], [1, 0, -1], [1, 0, -1]] game_over, value = game.check_game_over(1) self.assertEqual(game_over, True) self.assertEqual(value, 1)
def test_check_game_over_4(self): """Test case for the check_game_over function. Test for game not over. """ game = TicTacToeGame() game.state = [[-1, 0, 0], [0, -1, 0], [0, 1, 0]] game_over, value = game.check_game_over(-1) self.assertEqual(game_over, False) self.assertEqual(value, 0)
CFG.dirichlet_alpha = arguments.dirichlet_alpha CFG.epsilon = arguments.epsilon CFG.model_directory = arguments.model_directory CFG.num_eval_games = arguments.num_eval_games CFG.eval_win_rate = arguments.eval_win_rate CFG.load_model = arguments.load_model CFG.human_play = arguments.human_play CFG.resnet_blocks = arguments.resnet_blocks CFG.record_loss = arguments.record_loss CFG.loss_file = arguments.loss_file CFG.game = arguments.game # Initialize the game object with the chosen game. game = object if CFG.game == 0: game = TicTacToeGame() elif CFG.game == 1: game = OthelloGame() elif CFG.game == 2: game = ConnectFourGame() elif CFG.game == 3: game = RubiksCubeGame() net = NeuralNetworkWrapper(game) # Initialize the network with the best model. if CFG.load_model: file_path = CFG.model_directory + "best_model.meta" if os.path.exists(file_path): net.load_model("best_model") else: