示例#1
0
    def test_diagonal2(self):
        t3 = TTT(3)
        s3 = np.array([1, 0, -1, 1, -1, 0, -1, 1, 0])
        self.assertEqual(t3.check_winner(s3), {
            'winner': -1,
            'lines': [[2, 4, 6]]
        })
        self.assertTrue(t3.is_terminated(s3))

        t4 = TTT(4)
        s4 = np.array([-1, 0, 0, 1, -1, 0, 1, 0, -1, 1, 0, 0, 1, 0, 0, 0])
        self.assertTrue(t4.is_terminated(s4))
        self.assertEqual(t4.check_winner(s4), {
            'winner': 1,
            'lines': [[3, 6, 9, 12]]
        })
示例#2
0
    def test_alphabeta_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player2 = ABPruning(3)
        player2.set_penalty(0.2)

        scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0}
        game_played = 0

        while game_played < 11:
            if t.is_terminated():
                score = t.get_score()
                scores[score] += 1
                game_played += 1
                t = TTT(3)
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

            pass

        print(scores)
        wrong_cases = scores[-4] + scores[-2]
        self.assertTrue(wrong_cases == 0)
示例#3
0
    def test_diagonal1(self):
        t3 = TTT(3)
        s3 = np.array([1, 0, -1, 0, 1, -1, 0, 0, 1])
        self.assertTrue(t3.is_terminated(s3))
        self.assertEqual(t3.check_winner(s3), {
            'winner': 1,
            'lines': [[0, 4, 8]]
        })

        t4 = TTT(4)
        s4 = np.array([-1, 0, 0, 1, 0, -1, 0, 1, 0, 1, -1, 0, 1, 0, 0, -1])
        self.assertTrue(t4.is_terminated(s4))
        self.assertEqual(t4.check_winner(s4), {
            'winner': -1,
            'lines': [[0, 5, 10, 15]]
        })
示例#4
0
def minimax(state, mover: int, t: TTT) -> [Score, Move]:

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax(next_state, next_mover, t)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    return [best_score, best_move]
示例#5
0
def minimax_save(state, mover: int, t: TTT, table) -> (Score, Move):

    encoded_state = encode_state(state)
    if encode_state in table:
        return table[encoded_state]

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax_save(next_state, next_mover, t, table)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    table[encoded_state] = (best_score, best_move)
    return (best_score, best_move)
示例#6
0
    def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None:

        agent_q_turn = self._is_first_mover
        for _ in tqdm(range(numOfGames)):
            game = TTT(self._size)
            turn = True

            # one complete game :
            # prev state, action taken are from agent's turn
            # next state is from opponent's turn.
            # update in opponent's turn
            encoded_prev_state = None
            move_taken = None
            encoded_next_state = None
            while True:

                if turn is agent_q_turn:
                    # Q turn :
                    if game.is_terminated():
                        break
                    else:
                        possible_moves = game.get_available_positions()
                        encoded_prev_state = game.get_encoded_state()
                        move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves)
                        game.put(move_taken)
                        pass
                    pass
                else:
                    # opponent's turn :
                    if not game.is_terminated():
                        state = game.get_state()
                        # move below is considered as random (sampling procedure) :
                        move = opponent_agent(state)
                        game.put(move)
                        pass
                    encoded_next_state = game.get_encoded_state()
                    score = game.get_score()
                    if encoded_prev_state is not None:
                        # : to avoid just after first move case ( in case of Q is second mover )
                        self.update(encoded_prev_state,move_taken,encoded_next_state,score)
                    
                    pass
                
                turn = not turn
            pass
        
        return None
示例#7
0
 def test_row(self):
     t3 = TTT(3)
     s3 = np.array([0, 0, 0, -1, -1, 0, 1, 1, 1])
     self.assertEqual(t3.check_winner(s3), {
         'winner': 1,
         'lines': [[6, 7, 8]]
     })
     self.assertTrue(t3.is_terminated(s3))
示例#8
0
 def test_score1(self):
     t3 = TTT(3)
     s = [[1, -1, 0], [-1, 1, 0], [0, 0, 1]]
     s = np.array(s).reshape(-1)
     terminated = t3.is_terminated(s)
     score = t3.get_score(s)
     self.assertTrue(terminated)
     self.assertEqual(score, 5)
示例#9
0
 def test_column(self):
     t4 = TTT(4)
     s4 = np.array([1, 0, 0, -1, 0, 1, 0, -1, 1, 0, 0, -1, 0, 1, 0, -1])
     self.assertEqual(t4.check_winner(s4), {
         'winner': -1,
         'lines': [[3, 7, 11, 15]]
     })
     self.assertTrue(t4.is_terminated(s4))
示例#10
0
    def run_game(self, agent1, agent2, size=3):

        t = TTT(size)
        for i in range(size * size):
            agent = agent1 if i % 2 == 0 else agent2
            inferred = agent(t.get_state())
            t.put(inferred)
            if t.is_terminated():
                break

        return t.get_result()
示例#11
0
    def test_minimax_vs_minimax(self):

        size = 3
        t = TTT(size)
        filepath = 'results/minimax.pk'
        minimax_loaded = minimax_load(filepath)
        moves = 0
        while True:
            [_, best_move] = minimax_loaded(t.get_state())
            t.put(best_move)
            moves += 1
            if t.is_terminated():
                break
            pass

        self.assertEqual(t.check_winner()['winner'], 0)
        pass
示例#12
0
    def test_alphabeta_vs_alphabeta(self):

        t = TTT(3)
        player = ABPruning(3)
        moves = 0
        print('Moves : 0 ', end='')
        while True:
            [_, best_move] = player.get(t.get_state(), t.get_mover())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
示例#13
0
    def test_penalty_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player1.set_penalty(0.7)
        player2 = ABPruning(3)
        player2.set_penalty(0.7)

        games_played = 1
        scores = set()
        case1 = {1, 2, 3, 4}
        case2 = {-1, -2, -3}
        case3 = {0}

        while True:
            if t.is_terminated():
                score = t.get_score()
                scores.add(score)

                # check whether if win,draw,lose all happened
                wins = case1 & scores
                loses = case2 & scores
                draw = case3 & scores
                if len(wins) > 0:
                    if len(loses) > 0:
                        if len(draw) > 0:
                            break
                t = TTT(3)
                games_played += 1
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

        self.assertTrue(len(scores) > 2)
示例#14
0
    def test_alphabeta_vs_minimax(self):

        t = TTT(3)
        minimax_player = minimax_load('results/minimax.pk')
        alphabeta_player = ABPruning(3)

        moves = 0
        print('Moves : 0 ', end='')
        while True:
            if t.get_mover() == 1:
                [_, best_move] = alphabeta_player.get(t.get_state(),
                                                      t.get_mover())
            elif t.get_mover() == -1:
                [_, best_move] = minimax_player(t.get_state())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)