def test_play_matches_neural_network(self): memory = Memory(config.MEMORY_SIZE) # At the beginning, we set a random model. It will be similar to an untrained CNN, and quicker. # We also set config.MCTS_SIMS, which is rather low, and will produce poor estimations from the MCTS. # The idea is encourage exploration and generate a lot of boards in memory, even if the probabilities # associated to their possible actions are wrong. # Memory is completed at the end of the game according to the final winner, in order to correct the values # of each move. All the moves of the winner receive value=1 and all the moves of the loser receive value=-1 # The neural network will learn to predict the probabilities and the values. # It will learn wrong probas and values at the beginning, but after some time, the CNN and the neural network # will improve from eachother and converge. player1 = Agent('cnn_agent_1', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel()) player2 = Agent('cnn_agent_2', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel()) scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely memory.clear_stmemory() cnn1 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) cnn2 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS) cnn2.model.set_weights(cnn1.model.get_weights()) cnn1.plot_model() player1.model = cnn1 ######## RETRAINING ######## player1.replay(memory.ltmemory) for _ in range(1): scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely memory.clear_stmemory() player1.replay(memory.ltmemory) print('TOURNAMENT...') scores, _, points, sp_scores = play_matches.playMatches(player1, player2, config.EVAL_EPISODES, lg.logger_main, turns_until_tau0 = 0, memory = None) print('\nSCORES') print(scores) print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES') print(sp_scores)
print('ITERATION NUMBER ' + str(iteration)) lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) # SELF PLAY print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0=config.TURNS_UNTIL_TAU0, memory=memory) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: # RETRAINING print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0: pickle.dump( memory, open( run_folder + 'Model_' + str(initialise.INITIAL_RUN_NUMBER) + "/memory/memory" + str(iteration).zfill(4) + ".p", "wb"))
self_play_players.append(opponent_player) ######## SELF PLAY ######## #epsilon = init_epsilon - iteration * (init_epsilon / 50.0) epsilon = 0 print('Current epsilon: {}'.format(epsilon)) print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memories = playMatches(self_play_players, config.EPISODES, lg.logger_main, epsilon, memory=memories) print('\n') memories.clear_stmemory() if len(memories.ltmemory) >= MIN_MEMORY_SIZE: #set_learning_phase(1) # tell keras backend that the model will be learning now trained = True ######## RETRAINING ######## print('RETRAINING...') current_player.replay(memories.ltmemory) current_player.evaluate(memories.ltmemory) print('') if iteration != 0 and iteration % 4 == 0: pickle.dump( memories, open(run_folder + "memory/memory" + str(iteration).zfill(4) + ".p",
def evaluation_worker(conn): import config from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE import initialise from model import Residual_CNN, import_tf import_tf(1024 * 3) from game import Game from agent import Agent from memory import Memory from funcs import playMatches import loggers as lg import logging import time # initialise new test memory test_memories = Memory(int(MEMORY_SIZE / 10)) env = Game() # initialise new models # create an untrained neural network objects from the config file if len(env.grid_shape) == 2: shape = (1, ) + env.grid_shape else: shape = env.grid_shape if TEAM_SIZE > 1: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE), config.HIDDEN_CNN_LAYERS) else: current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT, config.HIDDEN_CNN_LAYERS) current_player_version = 0 best_player_version = 0 # If loading an existing neural netwrok, set the weights from that model if initialise.INITIAL_MODEL_VERSION != None: best_player_version = initialise.INITIAL_MODEL_VERSION #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...') m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION) current_NN.model.set_weights(m_tmp.get_weights()) best_NN.model.set_weights(m_tmp.get_weights()) # otherwise just ensure the weights on the two players are the same else: best_NN.model.set_weights(current_NN.model.get_weights()) current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT, current_NN) best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN) time.sleep(20) while 1: # request current_NN weights conn.send(current_player_version) # wait indefinitely for current_NN weights conn.poll(None) data = conn.recv() if data: # set current_NN weights current_NN.model.set_weights(data) current_player_version += 1 # play tournament games tourney_players = [] if TEAM_SIZE > 1: for i in range(int(PLAYER_COUNT / TEAM_SIZE)): # for each team for k in range( TEAM_SIZE ): # alternate adding best_players and current_players up to the TEAM_SIZE if k % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) else: for i in range(PLAYER_COUNT): if i % 2 == 0: tourney_players.append(best_player) else: tourney_players.append(current_player) scores, test_memories = playMatches(tourney_players, config.EVAL_EPISODES, lg.logger_tourney, 0.0, test_memories, evaluation=True) test_memories.clear_stmemory() # if the current player is significantly better than the best_player replace the best player # the replacement is made by just copying the weights of current_player's nn to best_player's nn if scores['current_player'] > scores[ 'best_player'] * config.SCORING_THRESHOLD: # if current_NN won send message conn.send(((current_player_version, best_player_version), str(scores))) best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) best_NN.write(env.name, best_player_version) if len( test_memories.ltmemory ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0: pickle.dump( memories, open( run_folder + "memory/test_memory" + str(current_player_version).zfill(4) + ".p", "wb")) #print("Evaluating performance of current_NN") #current_player.evaluate_accuracy(test_memories.ltmemory) #print('\n') else: time.sleep(10)
scores, _, points, sp_scores = playMatches(best_player, current_player, config.EVAL_EPISODES, lg.logger_tourney, turns_until_tau0 = 0, memory = None) best_player_version = 2 best_NN.model.set_weights(current_NN.model.get_weights()) actions = np.argwhere(memory.ltmemory[8]['AV'] == max(memory.ltmemory[8]['AV'])) actions = np.random.multinomial(1,memory.ltmemory[8]['AV']) actions np.where(actions==1) random.choice(actions)[0] memory.ltmemory[8]['AV'][370:390] memory.ltmemory[0]['board']=None memory.ltmemory['state'].engine=None memory.clear_stmemory()best_player_version = best_player_version + 1 best_NN.model.set_weights(current_NN.model.get_weights()) len(memory.ltmemory) pickle.dump(memory, open( run_folder + "memory/memory" + str(4).zfill(4) + ".p", "wb" ) ) memory.ltmemory[1] mem = Memory(config.MEMORY_SIZE) for m in memory.ltmemory: mem.commit_stmemory(m) memory = pickle.load(open( run_folder + "memory/multiproc/memory" + str(2).zfill(4) + ".p", "rb" ) ) len(memory[0].ltmemory) memory[0].ltmemory[32]
iteration += 1 reload(lg) reload(config) print('ITERATION NUMBER ' + str(iteration)) lg.logger_main.info('BEST PLAYER VERSION: %d', best_player_version) print('BEST PLAYER VERSION ' + str(best_player_version)) ######## SELF PLAY ######## print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...') _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory) print('\n') memory.clear_stmemory() if len(memory.ltmemory) >= config.MEMORY_SIZE: ######## RETRAINING ######## print('RETRAINING...') current_player.replay(memory.ltmemory) print('') if iteration % 5 == 0: pickle.dump( memory, open( run_folder + "memory/memory" + iteration + ".p", "wb" ) ) lg.logger_memory.info('====================') lg.logger_memory.info('NEW MEMORIES') lg.logger_memory.info('====================')