Exemplo n.º 1
0
    def deep_q_search(self, board, depth=0):
        #print(depth)
        curr_player = board.player_turn
        board_state = Gameplay.board_state_from_board(board)
        possible_board_states = Gameplay.board_states_from_possible_moves(
            board)
        invert = (curr_player == 2)

        if invert:
            board_state = Gameplay.invert_board(board_state)
            possible_board_states = np.array(
                list(
                    map(lambda x: Gameplay.invert_board(x),
                        possible_board_states)))

        if (len(possible_board_states) == 0):
            return None, 0

        q_values = []
        if (depth == 0):
            q_values = self.get_moves_Q_values(board_state,
                                               possible_board_states)
        else:
            moves = board.get_possible_moves()
            for move in moves:
                new_board = board.create_new_board_from_move(move)
                new_board_state = Gameplay.board_state_from_board(new_board)
                if invert:
                    new_board_state = Gameplay.invert_board(new_board_state)
                move_q_value = self.ddqn.predict_Q(board_state,
                                                   new_board_state)
                _, q_val = self.deep_q_search(new_board, depth - 1)
                if curr_player != new_board.player_turn:
                    q_val = -q_val
                q_values.append(move_q_value + q_val)

        best_q = np.max(q_values)
        best_move_idx = np.argmax(q_values)

        return best_move_idx, best_q
Exemplo n.º 2
0
 def display_agent_memory(agt, size=None):
     if(size is None):
         size = len(agt.memory)
     minibatch = random.sample(agt.memory, size)
     for board_state, board_state_action, reward, next_board_state, next_possible_board_states, done in minibatch:
         if not done:
             print('board_state')
             Gameplay.show_board(board_state)
             print('board_state_action')
             Gameplay.show_board(board_state_action)
             print('next_board_state')
             Gameplay.show_board(next_board_state)
             print('next_possible_board_states')
             for poss in next_possible_board_states:
                 Gameplay.show_board(poss)
             print('reward {}'.format(reward))
             print('done {}'.format(done))
         else:
             print('board_state')
             Gameplay.show_board(board_state)
             print('board_state_action')
             Gameplay.show_board(board_state_action)
             print('reward {}'.format(reward))
             print('done {}'.format(done))
Exemplo n.º 3
0
    def auto_play(self, n_episodes):
        plt.ion()
        plt.xlabel('Episodes')
        plt.ylabel('{} mean error'.format(self.name))
        x, y = [], []
        line, = plt.plot(x, y)
        plt.xlim(0, n_episodes)
        plt.ylim(0, config.PLOT_Y_LIM)

        for i in range(n_episodes):
            print("Episode {}".format(i))
            turns_hist = {
                1: [],
                2: []
            }
            gm = game.Game()
            boardState = Gameplay.board_state_from_board(gm.board)

            while (not gm.is_over()):
                player = gm.whose_turn()

                possible_board_states = Gameplay.board_states_from_possible_moves(gm.board)
                move_idx, q_val = Gameplay.get_QAgent_move_pp(self, gm)

                draw_counter = gm.moves_since_last_capture

                if (player == 2):
                    boardState = Gameplay.invert_board(boardState)
                    possible_board_states = np.array(
                        list(map(lambda x: Gameplay.invert_board(x), possible_board_states)))

                # Updating previous history
                if len(turns_hist[player]) > 0:
                    turns_hist[player][-1]['next_board_state'] = boardState
                    turns_hist[player][-1]['next_draw_counter'] = draw_counter
                    turns_hist[player][-1]['next_possible_board_states'] = possible_board_states

                move = gm.get_possible_moves()[move_idx]

                reward = 0
                if (move in gm.board.get_possible_capture_moves()):
                    reward += config.CAPTURE_REWARD

                piece_was_king = gm.board.searcher.get_piece_by_position(move[0]).king
                new_boardState = Gameplay.make_move(gm, move)

                if (not piece_was_king) and gm.board.searcher.get_piece_by_position(move[1]).king:
                    reward += config.KING_REWARD

                if len(turns_hist[Gameplay.get_other_player(player)]) > 0:
                    turns_hist[Gameplay.get_other_player(player)][-1]['reward'] -= reward

                # New history
                turns_hist[player].append({
                    'board_state': boardState,
                    'draw_counter': draw_counter,
                    'board_state_action': new_boardState,
                    'reward': reward,
                    'next_board_state': None,
                    'next_draw_counter': None,
                    'next_possible_board_states': None,
                    'done': False
                })
                if (player == 2):
                    turns_hist[player][-1]['board_state_action'] = Gameplay.invert_board(new_boardState)

                boardState = new_boardState

            print("Game Over! ")
            if gm.move_limit_reached():
                print("It's a tie!!")
                for j in range(2):
                    turns_hist[j + 1][-1]['reward'] += config.DRAW_REWARD
                    turns_hist[j + 1][-1]['done'] = True
            else:
                print("Winner is: {}".format(gm.get_winner()))
                turns_hist[gm.get_winner()][-1]['reward'] += config.WIN_REWARD
                turns_hist[gm.get_winner()][-1]['done'] = True
                turns_hist[Gameplay.get_other_player(gm.get_winner())][-1]['reward'] -= config.WIN_REWARD
                turns_hist[Gameplay.get_other_player(gm.get_winner())][-1]['done'] = True

            for k, v in turns_hist.items():
                print("Reward sum for {}: {}".format(k, sum(list(map(lambda x: x['reward'], v)))))

            for k, v in turns_hist.items():
                for turn_hist in v:
                    self.remember(turn_hist['board_state'], turn_hist['draw_counter'],
                                  turn_hist['board_state_action'], turn_hist['reward'],
                                  turn_hist['next_board_state'], turn_hist['next_draw_counter'],
                                  turn_hist['next_possible_board_states'], turn_hist['done'])

            if (len(self.memory) > self.replay_batch_size):
                self.replay_memory()
                y.append(self.loss_mean)
                x.append(i)
                line.set_data(x, y)
                plt.draw()
                plt.pause(0.000000001)

        return self
Exemplo n.º 4
0
    def replay_memory(self):
        print('Replay memory!')
        samples = random.sample(self.memory, self.replay_batch_size)

        done_not_draw = [m for m in self.memory if LearningUtils.is_done_not_draw(m[3], m[7])]
        if (len(done_not_draw) > config.DONE_NOT_DRAW_LIM):
            done_not_draw = random.sample(done_not_draw, config.DONE_NOT_DRAW_LIM)

        samples = np.concatenate((samples, done_not_draw), axis=0)

        avg_loss = 0
        minibatch_X = []
        minibatch_y = []
        for board_state, draw_counter, board_state_action, reward, next_board_state, next_draw_counter, next_possible_board_states, done in samples:
            if(self.replay_count == self.target_update_threshold):
                self.update_target_weights()
                self.replay_count = 0

            draw_counter_norm = draw_counter / 40

            if done or next_possible_board_states is None or not next_possible_board_states.size > self.state_size:
                q_value = reward
            else:
                next_draw_counter_norm = next_draw_counter / 40
                targets = []
                for possible_board_state in next_possible_board_states:
                    targets.append(self.ddqn.predict_Q(next_board_state, next_draw_counter_norm, possible_board_state))

                targets = np.array(targets)
                next_best_board = next_possible_board_states[np.argmax(targets)]
                q_value_t = self.ddqn_target.predict_Q(next_board_state, next_draw_counter_norm, next_best_board)
                if (self.replay_count % 13) == 0:
                    print('---------------------')
                    Gameplay.show_board(board_state)
                    print('---')
                    Gameplay.show_board(board_state_action)
                    print('draw counter: {}'.format(draw_counter))
                    print('reward: {}'.format(reward))
                    print('q_value: {}'.format(self.ddqn.predict_Q(board_state, draw_counter_norm, board_state_action)))

                q_value = reward + self.gamma * q_value_t

            board_state_reshaped = board_state.reshape(self.state_size)
            board_state_action_reshaped = board_state_action.reshape(self.state_size)
            minibatch_X.append(np.hstack((board_state_reshaped, board_state_action_reshaped, draw_counter_norm)))
            minibatch_y.append(q_value)
            self.replay_count += 1

        self.minibatch_count += 1
        #print(minibatch_y)
        hist = self.ddqn.train(np.array(minibatch_X), np.array(minibatch_y))
        avg_loss += np.mean(hist.history['loss'])

        self.loss_mean = (self.loss_mean*self.minibatch_count + avg_loss)/(self.minibatch_count+1)

        print('{} minibatch average Q_value: {}'.format(self.name, np.mean(minibatch_y)))
        print('{} minibatch average loss: {}'.format(self.name, avg_loss))
        print('{} overall average loss: {}'.format(self.name, self.loss_mean))
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay_rate
        print('{} epsilon: {}'.format(self.name, self.epsilon))
Exemplo n.º 5
0
import gc
from utils.gameplay import Gameplay
from utils.agent_utils import AgentUtils

gc.collect()

N_EPISODES = 10000
TRAIN_NEW_AGENT = False

agt_name = AgentUtils.V1_NAME

if TRAIN_NEW_AGENT:
    agent = AgentUtils.train_agent(N_EPISODES, version=1)
else:
    agent = AgentUtils.load_agent('agt_{}_test.h5'.format(agt_name),
                                  version=1,
                                  with_eps=True)
    agent = AgentUtils.train_agent(N_EPISODES,
                                   start_eps=0.99,
                                   agt=agent,
                                   resume=True)

agent.save_weights('agt_{}_test.h5'.format(agt_name))
agent.with_eps = False

#display_agent_memory(agent, 50)

Gameplay.run_game_with_agent(agent)
Exemplo n.º 6
0
from tqdm import tqdm
import numpy

n_games = 100
eps = 0.01
with_eps = True

agent_v1 = AgentUtils.load_agent('agt_{}_test.h5'.format(AgentUtils.V1_NAME),
                                 version=1,
                                 with_eps=with_eps)
agent_v2 = AgentUtils.load_agent('agt_{}_test.h5'.format(AgentUtils.V2_NAME),
                                 version=2,
                                 with_eps=with_eps)
agent_v1.epsilon = eps
agent_v2.epsilon = eps

results = []
for i in tqdm(range(n_games)):
    res = Gameplay.run_agent_duel(agent_v1, agent_v2, verbose=False)
    results.append(res)

results = numpy.array(results)

agt1_wins = (results == agent_v1.name).sum()
agt2_wins = (results == agent_v2.name).sum()
ties = (results == 'tie').sum()

print('{} wins: {}'.format(agent_v1.name, agt1_wins))
print('{} wins: {}'.format(agent_v2.name, agt2_wins))
print('Ties: {}'.format(ties))