Exemplo n.º 1
0
    def test_play_matches_neural_network(self):
    
        memory = Memory(config.MEMORY_SIZE)

        # At the beginning, we set a random model. It will be similar to an untrained CNN, and quicker.
        # We also set config.MCTS_SIMS, which is rather low, and will produce poor estimations from the MCTS.
        # The idea is encourage exploration and generate a lot of boards in memory, even if the probabilities
        # associated to their possible actions are wrong.
        # Memory is completed at the end of the game according to the final winner, in order to correct the values
        # of each move. All the moves of the winner receive value=1 and all the moves of the loser receive value=-1
        # The neural network will learn to predict the probabilities and the values.
        # It will learn wrong probas and values at the beginning, but after some time, the CNN and the neural network
        # will improve from eachother and converge.
        player1 = Agent('cnn_agent_1', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        player2 = Agent('cnn_agent_2', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        
        scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

        # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely
        memory.clear_stmemory()

        cnn1 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
        cnn2 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
        cnn2.model.set_weights(cnn1.model.get_weights())
        cnn1.plot_model()

        player1.model = cnn1

        ######## RETRAINING ########
        player1.replay(memory.ltmemory)

        for _ in range(1):

            scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

            # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely
            memory.clear_stmemory()

            player1.replay(memory.ltmemory)

        
        print('TOURNAMENT...')
        scores, _, points, sp_scores = play_matches.playMatches(player1, player2, config.EVAL_EPISODES, lg.logger_main, turns_until_tau0 = 0, memory = None)
        print('\nSCORES')
        print(scores)
        print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
        print(sp_scores)
    print('ITERATION NUMBER ' + str(iteration))

    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    # SELF PLAY
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    print('\n')

    memory.clear_stmemory()

    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        # RETRAINING
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump(
                memory,
                open(
                    run_folder + 'Model_' +
                    str(initialise.INITIAL_RUN_NUMBER) + "/memory/memory" +
                    str(iteration).zfill(4) + ".p", "wb"))
Exemplo n.º 3
0
            self_play_players.append(opponent_player)

    ######## SELF PLAY ########
    #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
    epsilon = 0

    print('Current epsilon: {}'.format(epsilon))
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memories = playMatches(self_play_players,
                              config.EPISODES,
                              lg.logger_main,
                              epsilon,
                              memory=memories)
    print('\n')

    memories.clear_stmemory()

    if len(memories.ltmemory) >= MIN_MEMORY_SIZE:
        #set_learning_phase(1) # tell keras backend that the model will be learning now

        trained = True
        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memories.ltmemory)
        current_player.evaluate(memories.ltmemory)
        print('')

    if iteration != 0 and iteration % 4 == 0:
        pickle.dump(
            memories,
            open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p",
def evaluation_worker(conn):
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    import initialise
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from game import Game
    from agent import Agent
    from memory import Memory
    from funcs import playMatches
    import loggers as lg
    import logging
    import time

    # initialise new test memory
    test_memories = Memory(int(MEMORY_SIZE / 10))

    env = Game()

    # initialise new models
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, int(PLAYER_COUNT / TEAM_SIZE),
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, PLAYER_COUNT,
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    current_player_version = 0
    best_player_version = 0
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                             initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        best_NN.model.set_weights(m_tmp.get_weights())
    # otherwise just ensure the weights on the two players are the same
    else:
        best_NN.model.set_weights(current_NN.model.get_weights())

    current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT,
                           current_NN)
    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)

    time.sleep(20)

    while 1:
        # request current_NN weights
        conn.send(current_player_version)
        # wait indefinitely for current_NN weights
        conn.poll(None)
        data = conn.recv()

        if data:

            # set current_NN weights
            current_NN.model.set_weights(data)
            current_player_version += 1

            # play tournament games
            tourney_players = []
            if TEAM_SIZE > 1:
                for i in range(int(PLAYER_COUNT / TEAM_SIZE)):  # for each team
                    for k in range(
                            TEAM_SIZE
                    ):  # alternate adding best_players and current_players up to the TEAM_SIZE
                        if k % 2 == 0:
                            tourney_players.append(best_player)
                        else:
                            tourney_players.append(current_player)
            else:
                for i in range(PLAYER_COUNT):
                    if i % 2 == 0:
                        tourney_players.append(best_player)
                    else:
                        tourney_players.append(current_player)

            scores, test_memories = playMatches(tourney_players,
                                                config.EVAL_EPISODES,
                                                lg.logger_tourney,
                                                0.0,
                                                test_memories,
                                                evaluation=True)
            test_memories.clear_stmemory()

            # if the current player is significantly better than the best_player replace the best player
            # the replacement is made by just copying the weights of current_player's nn to best_player's nn
            if scores['current_player'] > scores[
                    'best_player'] * config.SCORING_THRESHOLD:
                # if current_NN won send message
                conn.send(((current_player_version, best_player_version),
                           str(scores)))

                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

            if len(
                    test_memories.ltmemory
            ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0:
                pickle.dump(
                    memories,
                    open(
                        run_folder + "memory/test_memory" +
                        str(current_player_version).zfill(4) + ".p", "wb"))

                #print("Evaluating performance of current_NN")
                #current_player.evaluate_accuracy(test_memories.ltmemory)
                #print('\n')
        else:
            time.sleep(10)
Exemplo n.º 5
0
scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None)
best_player_version = 2
best_NN.model.set_weights(current_NN.model.get_weights())



actions = np.argwhere(memory.ltmemory[8]['AV'] == max(memory.ltmemory[8]['AV']))
actions = np.random.multinomial(1,memory.ltmemory[8]['AV'])
actions
np.where(actions==1)

random.choice(actions)[0]
memory.ltmemory[8]['AV'][370:390]
memory.ltmemory[0]['board']=None
memory.ltmemory['state'].engine=None
memory.clear_stmemory()best_player_version = best_player_version + 1
best_NN.model.set_weights(current_NN.model.get_weights())
len(memory.ltmemory)


pickle.dump(memory, open( run_folder + "memory/memory" + str(4).zfill(4) + ".p", "wb" ) )

memory.ltmemory[1]
mem = Memory(config.MEMORY_SIZE)

for m in memory.ltmemory:
	mem.commit_stmemory(m)

memory = pickle.load(open( run_folder + "memory/multiproc/memory" + str(2).zfill(4) + ".p",   "rb" ) )
len(memory[0].ltmemory)
memory[0].ltmemory[32]
Exemplo n.º 6
0
    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version)
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()
    
    if len(memory.ltmemory) >= config.MEMORY_SIZE:

        ######## RETRAINING ########
        print('RETRAINING...')
        current_player.replay(memory.ltmemory)
        print('')

        if iteration % 5 == 0:
            pickle.dump( memory, open( run_folder + "memory/memory" + iteration + ".p", "wb" ) )

        lg.logger_memory.info('====================')
        lg.logger_memory.info('NEW MEMORIES')
        lg.logger_memory.info('====================')