示例#1
0
    def test_alphabeta_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player2 = ABPruning(3)
        player2.set_penalty(0.2)

        scores = {-4: 0, -2: 0, 0: 0, 1: 0, 3: 0, 5: 0}
        game_played = 0

        while game_played < 11:
            if t.is_terminated():
                score = t.get_score()
                scores[score] += 1
                game_played += 1
                t = TTT(3)
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

            pass

        print(scores)
        wrong_cases = scores[-4] + scores[-2]
        self.assertTrue(wrong_cases == 0)
示例#2
0
    def test_as_second_mover(self):

        parameters = {
            "ep_train": 0.5,
            "ep_infer": 0,
            "gamma": 1,
            "alpha": 1,
            "agent_for": 'minimizer',
        }
        q = TabularQ(3)
        q.set_params(**parameters)
        opponent_agent = load('minimax')
        q.train(numOfGames=500, opponent_agent=opponent_agent)

        t = TTT(3)

        Q = q._Q
        updated_state_indices = np.where(
            Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0]  # [0] for row indices
        updated_state_indices = set(updated_state_indices)

        for i in updated_state_indices:
            state = q.get_state(i)
            mover = t.get_mover(state=state)
            self.assertEqual(mover, -1)

        return
示例#3
0
def initialize_minimax(filepath: str, size=3):
    table = {}
    t = TTT(size)
    minimax_save(t.get_state(), t.get_mover(), t, table)
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)

    return
示例#4
0
    def test_get_mover(self):
        t = TTT(3)
        s = [[0, 1, 0], [0, -1, 0], [0, 0, 0]]
        s = np.array(s)
        s = s.reshape(-1)

        mover = t.get_mover(state=s)
        self.assertTrue(mover == 1)
示例#5
0
 def test_set_state(self):
     t = TTT(3)
     state = [1, 0, 0, 1, 1, 0, -1, -1, 0]
     t.set_state(state)
     mover = t.get_mover()
     order = t._order
     num_of_moves = t._num_moves
     _state = np.array(state, dtype=int)
     self.assertEqual(mover, -1)
     self.assertEqual(order, False)
     self.assertEqual(num_of_moves, 5)
     self.assertTrue(np.array_equal(_state, t._state))
示例#6
0
def initialize_state_indices(filepath: str, size=3):
    table = {'current': 0}  # store state:index pair
    t = TTT(size)

    def dfs(state, mover: int, table=table) -> None:

        # store if the state is new one :
        encoded_state = t.get_encoded_state(state)
        if not encoded_state in table:
            table[encoded_state] = table['current']
            table['current'] += 1

        assert type(table[encoded_state]) is int

        next_mover = 1 if mover is -1 else -1
        available_moves = t.get_available_positions(state)
        for i in available_moves:
            next_state = state.copy()
            next_state[i] = mover
            if not t.is_terminated(next_state):
                dfs(next_state, next_mover)

        return

    # indexing start :
    initial_mover = t.get_mover()
    initial_state = t.get_state()
    print('indexing start :')
    dfs(initial_state, initial_mover)

    # simple validate :
    num_visited = table['current']
    del (table['current'])
    num_stored = len(table)
    print(f'visited states : {num_visited}')
    print(f'stored states : {num_stored}')
    assert num_stored == num_visited
    indices = set(table.values())
    assert len(indices) == len(table)
    sample_index = list(table.values())[1]
    assert type(sample_index) is int

    # save :
    print('saving... ', end='')
    with open(filepath, 'wb') as f:
        pickle.dump(table, f)
    print('done')

    return
示例#7
0
    def test_alphabeta_vs_minimax(self):

        t = TTT(3)
        minimax_player = minimax_load('results/minimax.pk')
        alphabeta_player = ABPruning(3)

        moves = 0
        print('Moves : 0 ', end='')
        while True:
            if t.get_mover() == 1:
                [_, best_move] = alphabeta_player.get(t.get_state(),
                                                      t.get_mover())
            elif t.get_mover() == -1:
                [_, best_move] = minimax_player(t.get_state())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
示例#8
0
    def test_alphabeta_vs_alphabeta(self):

        t = TTT(3)
        player = ABPruning(3)
        moves = 0
        print('Moves : 0 ', end='')
        while True:
            [_, best_move] = player.get(t.get_state(), t.get_mover())
            t.put(best_move)
            moves += 1
            print(f'{moves} ', end='')
            if t.is_terminated():
                break
            pass

        print('final state')
        print(t)
        self.assertEqual(t.check_winner()['winner'], 0)
示例#9
0
    def test_penalty_vs_penalty(self):

        t = TTT(3)
        player1 = ABPruning(3)
        player1.set_penalty(0.7)
        player2 = ABPruning(3)
        player2.set_penalty(0.7)

        games_played = 1
        scores = set()
        case1 = {1, 2, 3, 4}
        case2 = {-1, -2, -3}
        case3 = {0}

        while True:
            if t.is_terminated():
                score = t.get_score()
                scores.add(score)

                # check whether if win,draw,lose all happened
                wins = case1 & scores
                loses = case2 & scores
                draw = case3 & scores
                if len(wins) > 0:
                    if len(loses) > 0:
                        if len(draw) > 0:
                            break
                t = TTT(3)
                games_played += 1
                pass

            mover = t.get_mover()
            if mover == 1:
                [_, move] = player1.get(t.get_state(), mover)
                t.put(move)
            elif mover == -1:
                [_, move] = player2.get(t.get_state(), mover)
                t.put(move)

        self.assertTrue(len(scores) > 2)
示例#10
0
    def test_deterministic_vs_minimax(self):
        # gamma, alpha == 1 guarantees that for endstates s and optimal move a,
        # Q(s,a) = R(s,a) IF Q(s,a) IS NOT 0
        # Here, R(s,a) is the score of the terminated state
        parameters = {
            "ep_train": 0.5,
            "ep_infer": 0,
            "gamma": 1,
            "alpha": 1,
            "agent_for": 'both',
        }
        q = TabularQ(3)
        q.set_params(**parameters)
        q.train(numOfGames=500)

        s = Settings()
        minimax = minimax_load(s.path('minimax'))
        t = TTT(3)

        Q = q._Q
        to_check_state_indices = np.where(Q != [0, 0, 0, 0, 0, 0, 0, 0, 0])[0]
        to_check_state_indices = map(int, to_check_state_indices)

        for state_index in to_check_state_indices:

            self.assertFalse(
                np.array_equal(Q[state_index],
                               np.array([0, 0, 0, 0, 0, 0, 0, 0, 0])))
            state = q.get_state(state_index)
            encoded_state = t.get_encoded_state(state)
            mover = t.get_mover(state=state)
            possible_moves = t.get_available_positions(state)

            if mover == 1:
                best_move_q = np.argmax(Q[state_index])
                if int(Q[state_index, best_move_q]) is not 0:
                    move_inferred = q.infer(encoded_state, possible_moves,
                                            mover)
                    q_value_1 = Q[state_index, best_move_q]
                    q_value_2 = Q[state_index, move_inferred]
                    self.assertEqual(q_value_1, q_value_2)
            elif mover == -1:
                best_move_q = np.argmin(Q[state_index])
                if int(Q[state_index, best_move_q]) is not 0:
                    move_inferred = q.infer(encoded_state, possible_moves,
                                            mover)
                    q_value_1 = Q[state_index, best_move_q]
                    q_value_2 = Q[state_index, move_inferred]
                    self.assertEqual(q_value_1, q_value_2)

            next_state = state.copy()
            next_state[best_move_q] = mover

            result = t.get_result(next_state)
            if result['terminated']:
                best_score, _ = minimax(state)
                q_value = Q[state_index, best_move_q]
                if best_score != q_value:
                    # not yet sampled (s,a)
                    # or withdraw case
                    self.assertEqual(q_value, 0)
                else:
                    # sampled (s,a)
                    self.assertEqual(best_score, q_value)
            pass