def test_alphabeta_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player2 = ABPruning(3) player2.set_penalty(0.2) scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0} game_played = 0 while game_played < 11: if t.is_terminated(): score = t.get_score() scores[score] += 1 game_played += 1 t = TTT(3) pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) pass print(scores) wrong_cases = scores[-4] + scores[-2] self.assertTrue(wrong_cases == 0)
def test_as_second_mover(self): parameters = { "ep_train": 0.5, "ep_infer": 0, "gamma": 1, "alpha": 1, "agent_for": 'minimizer', } q = TabularQ(3) q.set_params(**parameters) opponent_agent = load('minimax') q.train(numOfGames=500, opponent_agent=opponent_agent) t = TTT(3) Q = q._Q updated_state_indices = np.where( Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0] # [0] for row indices updated_state_indices = set(updated_state_indices) for i in updated_state_indices: state = q.get_state(i) mover = t.get_mover(state=state) self.assertEqual(mover, -1) return
def initialize_minimax(filepath: str, size=3): table = {} t = TTT(size) minimax_save(t.get_state(), t.get_mover(), t, table) with open(filepath, 'wb') as f: pickle.dump(table, f) return
def test_get_mover(self): t = TTT(3) s = [[0, 1, 0], [0, -1, 0], [0, 0, 0]] s = np.array(s) s = s.reshape(-1) mover = t.get_mover(state=s) self.assertTrue(mover == 1)
def test_set_state(self): t = TTT(3) state = [1, 0, 0, 1, 1, 0, -1, -1, 0] t.set_state(state) mover = t.get_mover() order = t._order num_of_moves = t._num_moves _state = np.array(state, dtype=int) self.assertEqual(mover, -1) self.assertEqual(order, False) self.assertEqual(num_of_moves, 5) self.assertTrue(np.array_equal(_state, t._state))
def initialize_state_indices(filepath: str, size=3): table = {'current': 0} # store state:index pair t = TTT(size) def dfs(state, mover: int, table=table) -> None: # store if the state is new one : encoded_state = t.get_encoded_state(state) if not encoded_state in table: table[encoded_state] = table['current'] table['current'] += 1 assert type(table[encoded_state]) is int next_mover = 1 if mover is -1 else -1 available_moves = t.get_available_positions(state) for i in available_moves: next_state = state.copy() next_state[i] = mover if not t.is_terminated(next_state): dfs(next_state, next_mover) return # indexing start : initial_mover = t.get_mover() initial_state = t.get_state() print('indexing start :') dfs(initial_state, initial_mover) # simple validate : num_visited = table['current'] del (table['current']) num_stored = len(table) print(f'visited states : {num_visited}') print(f'stored states : {num_stored}') assert num_stored == num_visited indices = set(table.values()) assert len(indices) == len(table) sample_index = list(table.values())[1] assert type(sample_index) is int # save : print('saving... ', end='') with open(filepath, 'wb') as f: pickle.dump(table, f) print('done') return
def test_alphabeta_vs_minimax(self): t = TTT(3) minimax_player = minimax_load('results/minimax.pk') alphabeta_player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: if t.get_mover() == 1: [_, best_move] = alphabeta_player.get(t.get_state(), t.get_mover()) elif t.get_mover() == -1: [_, best_move] = minimax_player(t.get_state()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def test_alphabeta_vs_alphabeta(self): t = TTT(3) player = ABPruning(3) moves = 0 print('Moves : 0 ', end='') while True: [_, best_move] = player.get(t.get_state(), t.get_mover()) t.put(best_move) moves += 1 print(f'{moves} ', end='') if t.is_terminated(): break pass print('final state') print(t) self.assertEqual(t.check_winner()['winner'], 0)
def test_penalty_vs_penalty(self): t = TTT(3) player1 = ABPruning(3) player1.set_penalty(0.7) player2 = ABPruning(3) player2.set_penalty(0.7) games_played = 1 scores = set() case1 = {1, 2, 3, 4} case2 = {-1, -2, -3} case3 = {0} while True: if t.is_terminated(): score = t.get_score() scores.add(score) # check whether if win,draw,lose all happened wins = case1 & scores loses = case2 & scores draw = case3 & scores if len(wins) > 0: if len(loses) > 0: if len(draw) > 0: break t = TTT(3) games_played += 1 pass mover = t.get_mover() if mover == 1: [_, move] = player1.get(t.get_state(), mover) t.put(move) elif mover == -1: [_, move] = player2.get(t.get_state(), mover) t.put(move) self.assertTrue(len(scores) > 2)
def test_deterministic_vs_minimax(self): # gamma, alpha == 1 guarantees that for endstates s and optimal move a, # Q(s,a) = R(s,a) IF Q(s,a) IS NOT 0 # Here, R(s,a) is the score of the terminated state parameters = { "ep_train": 0.5, "ep_infer": 0, "gamma": 1, "alpha": 1, "agent_for": 'both', } q = TabularQ(3) q.set_params(**parameters) q.train(numOfGames=500) s = Settings() minimax = minimax_load(s.path('minimax')) t = TTT(3) Q = q._Q to_check_state_indices = np.where(Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0] to_check_state_indices = map(int, to_check_state_indices) for state_index in to_check_state_indices: self.assertFalse( np.array_equal(Q[state_index], np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]))) state = q.get_state(state_index) encoded_state = t.get_encoded_state(state) mover = t.get_mover(state=state) possible_moves = t.get_available_positions(state) if mover == 1: best_move_q = np.argmax(Q[state_index]) if int(Q[state_index, best_move_q]) is not 0: move_inferred = q.infer(encoded_state, possible_moves, mover) q_value_1 = Q[state_index, best_move_q] q_value_2 = Q[state_index, move_inferred] self.assertEqual(q_value_1, q_value_2) elif mover == -1: best_move_q = np.argmin(Q[state_index]) if int(Q[state_index, best_move_q]) is not 0: move_inferred = q.infer(encoded_state, possible_moves, mover) q_value_1 = Q[state_index, best_move_q] q_value_2 = Q[state_index, move_inferred] self.assertEqual(q_value_1, q_value_2) next_state = state.copy() next_state[best_move_q] = mover result = t.get_result(next_state) if result['terminated']: best_score, _ = minimax(state) q_value = Q[state_index, best_move_q] if best_score != q_value: # not yet sampled (s,a) # or withdraw case self.assertEqual(q_value, 0) else: # sampled (s,a) self.assertEqual(best_score, q_value) pass