示例#1
0
    def test_alphabeta_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player2 = ABPruning(3)
        player2.set_penalty(0.2)

        scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0}
        game_played = 0

        while game_played < 11:
            if t.is_terminated():
                score = t.get_score()
                scores[score] += 1
                game_played += 1
                t = TTT(3)
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

            pass

        print(scores)
        wrong_cases = scores[-4] + scores[-2]
        self.assertTrue(wrong_cases == 0)
示例#2
0
def minimax(state, mover: int, t: TTT) -> [Score, Move]:

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax(next_state, next_mover, t)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    return [best_score, best_move]
示例#3
0
def minimax_save(state, mover: int, t: TTT, table) -> (Score, Move):

    encoded_state = encode_state(state)
    if encode_state in table:
        return table[encoded_state]

    next_mover = -1 if mover is 1 else 1
    possible_moves = t.get_available_positions(state)
    corresponding_scores = []
    best_score = 0
    best_move = None

    for index in possible_moves:
        next_state = state.copy()
        next_state[index] = mover
        if t.is_terminated(next_state):
            score = t.get_score(next_state)
            corresponding_scores.append(score)
        else:
            [score, _] = minimax_save(next_state, next_mover, t, table)
            corresponding_scores.append(score)

    if mover == 1:
        best_score = max(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]
    elif mover == -1:
        best_score = min(corresponding_scores)
        best_move_index = corresponding_scores.index(best_score)
        best_move = possible_moves[best_move_index]

    table[encoded_state] = (best_score, best_move)
    return (best_score, best_move)
示例#4
0
 def test_score1(self):
     t3 = TTT(3)
     s = [[1, -1, 0], [-1, 1, 0], [0, 0, 1]]
     s = np.array(s).reshape(-1)
     terminated = t3.is_terminated(s)
     score = t3.get_score(s)
     self.assertTrue(terminated)
     self.assertEqual(score, 5)
示例#5
0
    def _train_against(self,opponent_agent:Callable[[np.ndarray],int],numOfGames:int)->None:

        agent_q_turn = self._is_first_mover
        for _ in tqdm(range(numOfGames)):
            game = TTT(self._size)
            turn = True

            # one complete game :
            # prev state, action taken are from agent's turn
            # next state is from opponent's turn.
            # update in opponent's turn
            encoded_prev_state = None
            move_taken = None
            encoded_next_state = None
            while True:

                if turn is agent_q_turn:
                    # Q turn :
                    if game.is_terminated():
                        break
                    else:
                        possible_moves = game.get_available_positions()
                        encoded_prev_state = game.get_encoded_state()
                        move_taken = self._epsilon_greedy_train(encoded_prev_state,possible_moves)
                        game.put(move_taken)
                        pass
                    pass
                else:
                    # opponent's turn :
                    if not game.is_terminated():
                        state = game.get_state()
                        # move below is considered as random (sampling procedure) :
                        move = opponent_agent(state)
                        game.put(move)
                        pass
                    encoded_next_state = game.get_encoded_state()
                    score = game.get_score()
                    if encoded_prev_state is not None:
                        # : to avoid just after first move case ( in case of Q is second mover )
                        self.update(encoded_prev_state,move_taken,encoded_next_state,score)
                    
                    pass
                
                turn = not turn
            pass
        
        return None
示例#6
0
    def test_penalty_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player1.set_penalty(0.7)
        player2 = ABPruning(3)
        player2.set_penalty(0.7)

        games_played = 1
        scores = set()
        case1 = {1, 2, 3, 4}
        case2 = {-1, -2, -3}
        case3 = {0}

        while True:
            if t.is_terminated():
                score = t.get_score()
                scores.add(score)

                # check whether if win,draw,lose all happened
                wins = case1 & scores
                loses = case2 & scores
                draw = case3 & scores
                if len(wins) > 0:
                    if len(loses) > 0:
                        if len(draw) > 0:
                            break
                t = TTT(3)
                games_played += 1
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

        self.assertTrue(len(scores) > 2)