def test_diagonal2(self): t3 = TTT(3) s3 = np.array([1, 0, -1, 1, -1, 0, -1, 1, 0]) self.assertEqual(t3.check_winner(s3), { 'winner': -1, 'lines': [[2, 4, 6]] }) self.assertTrue(t3.is_terminated(s3)) t4 = TTT(4) s4 = np.array([-1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, 1, 0, 0, 0]) self.assertTrue(t4.is_terminated(s4)) self.assertEqual(t4.check_winner(s4), { 'winner': 1, 'lines': [[3, 6, 9, 12]] })
def test_alphabeta_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player2 = ABPruning(3) player2.set_penalty(0.2) scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0} game_played = 0 while game_played < 11: if t.is_terminated(): score = t.get_score() scores[score] += 1 game_played += 1 t = TTT(3) pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) pass print(scores) wrong_cases = scores[-4] + scores[-2] self.assertTrue(wrong_cases == 0)
def test_diagonal1(self): t3 = TTT(3) s3 = np.array([1, 0, -1, 0, 1, -1, 0, 0, 1]) self.assertTrue(t3.is_terminated(s3)) self.assertEqual(t3.check_winner(s3), { 'winner': 1, 'lines': [[0, 4, 8]] }) t4 = TTT(4) s4 = np.array([-1, 0, 0, 1, 0, -1, 0, 1, 0, 1, -1, 0, 1, 0, 0, -1]) self.assertTrue(t4.is_terminated(s4)) self.assertEqual(t4.check_winner(s4), { 'winner': -1, 'lines': [[0, 5, 10, 15]] })
def minimax(state, mover: int, t: TTT) -> [Score, Move]: next_mover = -1 if mover is 1 else 1 possible_moves = t.get_available_positions(state) corresponding_scores = [] best_score = 0 best_move = None for index in possible_moves: next_state = state.copy() next_state[index] = mover if t.is_terminated(next_state): score = t.get_score(next_state) corresponding_scores.append(score) else: [score, _] = minimax(next_state, next_mover, t) corresponding_scores.append(score) if mover == 1: best_score = max(corresponding_scores) best_move_index = corresponding_scores.index(best_score) best_move = possible_moves[best_move_index] elif mover == -1: best_score = min(corresponding_scores) best_move_index = corresponding_scores.index(best_score) best_move = possible_moves[best_move_index] return [best_score, best_move]
def minimax_save(state, mover: int, t: TTT, table) -> (Score, Move): encoded_state = encode_state(state) if encode_state in table: return table[encoded_state] next_mover = -1 if mover is 1 else 1 possible_moves = t.get_available_positions(state) corresponding_scores = [] best_score = 0 best_move = None for index in possible_moves: next_state = state.copy() next_state[index] = mover if t.is_terminated(next_state): score = t.get_score(next_state) corresponding_scores.append(score) else: [score, _] = minimax_save(next_state, next_mover, t, table) corresponding_scores.append(score) if mover == 1: best_score = max(corresponding_scores) best_move_index = corresponding_scores.index(best_score) best_move = possible_moves[best_move_index] elif mover == -1: best_score = min(corresponding_scores) best_move_index = corresponding_scores.index(best_score) best_move = possible_moves[best_move_index] table[encoded_state] = (best_score, best_move) return (best_score, best_move)
def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None: agent_q_turn = self._is_first_mover for _ in tqdm(range(numOfGames)): game = TTT(self._size) turn = True # one complete game : # prev state, action taken are from agent's turn # next state is from opponent's turn. # update in opponent's turn encoded_prev_state = None move_taken = None encoded_next_state = None while True: if turn is agent_q_turn: # Q turn : if game.is_terminated(): break else: possible_moves = game.get_available_positions() encoded_prev_state = game.get_encoded_state() move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves) game.put(move_taken) pass pass else: # opponent's turn : if not game.is_terminated(): state = game.get_state() # move below is considered as random (sampling procedure) : move = opponent_agent(state) game.put(move) pass encoded_next_state = game.get_encoded_state() score = game.get_score() if encoded_prev_state is not None: # : to avoid just after first move case ( in case of Q is second mover ) self.update(encoded_prev_state,move_taken,encoded_next_state,score) pass turn = not turn pass return None
def test_row(self): t3 = TTT(3) s3 = np.array([0, 0, 0, -1, -1, 0, 1, 1, 1]) self.assertEqual(t3.check_winner(s3), { 'winner': 1, 'lines': [[6, 7, 8]] }) self.assertTrue(t3.is_terminated(s3))
def test_score1(self): t3 = TTT(3) s = [[1, -1, 0], [-1, 1, 0], [0, 0, 1]] s = np.array(s).reshape(-1) terminated = t3.is_terminated(s) score = t3.get_score(s) self.assertTrue(terminated) self.assertEqual(score, 5)
def test_column(self): t4 = TTT(4) s4 = np.array([1, 0, 0, -1, 0, 1, 0, -1, 1, 0, 0, -1, 0, 1, 0, -1]) self.assertEqual(t4.check_winner(s4), { 'winner': -1, 'lines': [[3, 7, 11, 15]] }) self.assertTrue(t4.is_terminated(s4))
def run_game(self, agent1, agent2, size=3): t = TTT(size) for i in range(size * size): agent = agent1 if i % 2 == 0 else agent2 inferred = agent(t.get_state()) t.put(inferred) if t.is_terminated(): break return t.get_result()
def test_minimax_vs_minimax(self): size = 3 t = TTT(size) filepath = 'results/minimax.pk' minimax_loaded = minimax_load(filepath) moves = 0 while True: [_, best_move] = minimax_loaded(t.get_state()) t.put(best_move) moves += 1 if t.is_terminated(): break pass self.assertEqual(t.check_winner()['winner'], 0) pass
def test_alphabeta_vs_alphabeta(self): t = TTT(3) player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: [_, best_move] = player.get(t.get_state(), t.get_mover()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def test_penalty_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player1.set_penalty(0.7) player2 = ABPruning(3) player2.set_penalty(0.7) games_played = 1 scores = set() case1 = {1, 2, 3, 4} case2 = {-1, -2, -3} case3 = {0} while True: if t.is_terminated(): score = t.get_score() scores.add(score) # check whether if win,draw,lose all happened wins = case1 & scores loses = case2 & scores draw = case3 & scores if len(wins) > 0: if len(loses) > 0: if len(draw) > 0: break t = TTT(3) games_played += 1 pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) self.assertTrue(len(scores) > 2)
def test_alphabeta_vs_minimax(self): t = TTT(3) minimax_player = minimax_load('results/minimax.pk') alphabeta_player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: if t.get_mover() == 1: [_, best_move] = alphabeta_player.get(t.get_state(), t.get_mover()) elif t.get_mover() == -1: [_, best_move] = minimax_player(t.get_state()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)