Пример #1
0
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
    
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            player1_network = player1_NN.read(env.name, run_version, player1version)
            player1_NN.model.set_weights(player1_network.get_weights())   
        player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
        
        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version, player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first)

    return (scores, memory, points, sp_scores)
def _selfplay(n):
    chessenv = Game()
    memory = Memory(config.MEMORY_SIZE)
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                              (119, ) + chessenv.grid_shape,
                              chessenv.action_size, config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           (119, ) + chessenv.grid_shape, chessenv.action_size,
                           config.HIDDEN_CNN_LAYERS)
    best_NN.model.set_weights(current_NN.model.get_weights())
    current_player = Agent('current_player', chessenv.state_size,
                           chessenv.action_size, config.MCTS_SIMS,
                           config.CPUCT, current_NN)
    best_player = Agent('best_player', chessenv.state_size,
                        chessenv.action_size, config.MCTS_SIMS, config.CPUCT,
                        best_NN)

    t0 = time.perf_counter()
    print('Proc {0} start'.format(n))
    _, memory, _, _ = playMatches(best_player,
                                  best_player,
                                  config.EPISODES,
                                  lg.logger_main,
                                  turns_until_tau0=config.TURNS_UNTIL_TAU0,
                                  memory=memory)
    t1 = time.perf_counter() - t0
    print('Proc {0} done in {1} seconds'.format(n, t1))
    return memory
  def start(self, data):

    # Generate Game from initial json
    board_json = data[BOARD_KEY]

    # Find grid shape
    self.grid_shape=(board_json[WIDTH_KEY],
                board_json[HEIGHT_KEY])

    self.w, self.h = self.grid_shape

    # Find the snake positions.
    snakes = board_json[SNAKES_KEY]

    # Find your position
    your_id = data[YOU_KEY][ID_KEY]

    starting_pos_json = []
    for snake in snakes:
      if snake[ID_KEY] == your_id:
        starting_pos_json = [snake[BODY_KEY][0]] + starting_pos_json
      else:
        starting_pos_json = starting_pos_json + [snake[BODY_KEY][0]]

    starting_pos = []
    for pos_json in starting_pos_json:
      starting_pos += [(pos_json['x'], pos_json['y'])]

    self.num_players = len(snakes) + 1

    # Food layer immediately after players
    self.food_layer = self.num_players * 2

    # Other Layer immediately after food
    self.other_layer = self.food_layer + 1

    # Total layers
    self.num_layers = self.other_layer + 1

    # Find the food positions.
    foods = board_json[FOOD_KEY]
    starting_food = []
    for pos_json in foods:
      starting_food += (pos_json['x'], pos_json['y'])

    print("Creating new Game")
    # Create Game
    env = Game(self.grid_shape, self.num_players, starting_pos, starting_food)

    print("Creating new Agent")
    # Create Agent
    player_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape, env.action_size, config.HIDDEN_CNN_LAYERS)
    player_network = player_NN.read(env.name, self.run_version, self.playerversion)
    player_NN.model.set_weights(player_network.get_weights())   
    # self.player = Agent('player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player_NN)
    self.player = Agent('player', env.state_size, env.action_size, 50, config.CPUCT, player_NN)
Пример #4
0
def TTA_fitting(tr_gen, val_gen, test_gen, dummy_test_gen):
    fold_val, fold_test = [], []
    for i in range(N_ENSMBLE):
        callbacks = [
            # val_lossが下がらなくなった際に学習を終了するコールバック
            EarlyStopping(monitor='val_loss', patience=30, verbose=1),
            # val_lossが下がらなくなった際に学習率を下げるコールバック
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=10,
                              min_lr=1e-9),
            # val_categorical_accuracyが最大のmodelを保存するコールバック
            ModelCheckpoint(savePath,
                            monitor='val_categorical_accuracy',
                            save_best_only=True)
        ]

        # load data
        RCNN = Residual_CNN(N_BLOCK, N_LAYER, FILTER, KERNEL)
        model = RCNN.build_model()

        # model training
        model.fit_generator(tr_gen,
                            callbacks,
                            steps_per_epoch=tr_x.shape[0] // BATCH_SIZE,
                            epochs=EPOCH,
                            validation_data=val_gen,
                            validation_steps=val_x.shape[0] // BATCH_SIZE)
        # TTA dummy test data
        val_pred = TTA(model, dummy_test_gen, sub_epoch=10)
        tta_val_acc = accuracy_score(np.argmax(val_y, 1),
                                     np.argmax(val_pred, 1))
        print(f"TTA_vaild_accuracy : {tta_val_acc}")

        # TTA test data
        test_pred = TTA(model, test_gen, sub_epoch=10)

        # stacking TTA prediction
        fold_val.append(val_pred)
        fold_test.append(test_pred)

        # TTA ensmble accuracy
        if i != 0:
            mean_val_pred = np.mean(fold_val, axis=0)
            mean_test_pred = np.mean(fold_test, axis=0)
            ensmble_tta_val_acc = accuracy_score(np.argmax(val_y, 1),
                                                 np.argmax(mean_val_pred, 1))
            print(f"ensmbleTTA_vaild_accuracy : {ensmble_tta_val_acc}")

            # saving TTA ensmble prediction of test data
            submission = pd.Series(mean_test_pred, name='label')
            submission.to_csv(os.path.join(
                PATH, 'submission_ensemble_' + str(i + 1) + ".csv"),
                              header=True,
                              index_label='id')
Пример #5
0
    def __init__(self, enable_cache=False):
        self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                     config.INPUT_START_DIM,
                                     config.OUTPUT_START_DIM,
                                     config.HIDDEN_CNN_LAYERS)
        self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                               config.INPUT_DIM, config.OUTPUT_DIM,
                               config.HIDDEN_CNN_LAYERS)

        self.enable_cache = enable_cache
        self.cache = {}
Пример #6
0
def Fold_fitting(train_x, train_y, test):
    N_FOLD = 5
    RCNN = Residual_CNN(N_BLOCK, N_LAYER, FILTER, KERNEL)
    model = RCNN.build_model()

    fold = StratifiedKFold(n_splits=N_FOLD, random_state=1103)
    pred = []
    acc = []
    for i, (tr_idx,
            val_idx) in enumerate(fold.split(train_x, np.argmax(train_y, 1))):
        callbacks = [
            # val_lossが下がらなくなった際に学習を終了するコールバック
            EarlyStopping(monitor='val_loss', patience=30, verbose=1),
            # val_lossが下がらなくなった際に学習率を下げるコールバック
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=10,
                              min_lr=1e-9),
            # val_categorical_accuracyが最大のmodelを保存するコールバック
            ModelCheckpoint(savePath,
                            monitor='val_categorical_accuracy',
                            save_best_only=True)
        ]

        print(f"==========={i}_fold start ================= ")
        train_split = train_x[tr_idx], train_y[val_idx]
        val_split = val_y[tr_idx], val_y[val_idx]
        tr_gen, val_gen, test_gen, dummy_test_gen = get_argment_generator(
            train_split, val_split, train, test, BATCH_SIZE)

        h = model.fit_generator(tr_gen,
                                epochs=EPOCH,
                                steps_per_epoch=len(tr_x) // BATCH_SIZE,
                                validation_data=val_gen,
                                validation_steps=len(val_x) // BATCH_SIZE,
                                callbacks=callbacks)
        val_pred = model.predict(dummy_test_gen)
        test_pred = model.predict(test_gen)
        val_score = accuracy_score(np.argmax(val_split[1], 1), val_pred)
        print(f"fold_{i} val accuracy : {val_score}")
        pred.append(test_pred)

    mean_pred = np.mean(pred, axis=0)
    pred_y = np.argmax(mean_pred, 1)
    submission = pd.Series(pred_y, name='label')
    submission.to_csv(os.path.join(PATH, f"submission_fold{N_FOLD}.csv"),
                      header=True,
                      index_label='id')
    def __init__(self, model_number, nbMatches, alphaMaxDepth=4):

        # Determines both probability of choosing correct move and time to complete.
        self.alphaMaxDepth = alphaMaxDepth

        # Environments
        self.zeroEnv, self.alphaEnv = None, None
        self.reset_envs()

        # Players
        self.zeroPlayer = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                       (2, ) + self.zeroEnv.grid_shape,
                                       self.zeroEnv.action_size,
                                       config.HIDDEN_CNN_LAYERS)
        self.alphaPlayer = AlphaBeta(board=self.alphaEnv,
                                     max_depth=alphaMaxDepth)

        # Setting weights and initiating agent.
        m_tmp = self.zeroPlayer.read(initialise.INITIAL_RUN_NUMBER,
                                     model_number)
        self.zeroPlayer.model.set_weights(m_tmp.get_weights())
        self.zeroPlayer = Agent('player1', self.zeroEnv.state_size,
                                self.zeroEnv.action_size, config.MCTS_SIMS,
                                config.CPUCT, self.zeroPlayer)

        self.results = 0

        # Playing matches
        for x in range(nbMatches):
            self.results += self.playMatch()
            self.reset_envs()
Пример #8
0
    def test_play_matches_neural_network(self):
    
        memory = Memory(config.MEMORY_SIZE)

        # At the beginning, we set a random model. It will be similar to an untrained CNN, and quicker.
        # We also set config.MCTS_SIMS, which is rather low, and will produce poor estimations from the MCTS.
        # The idea is encourage exploration and generate a lot of boards in memory, even if the probabilities
        # associated to their possible actions are wrong.
        # Memory is completed at the end of the game according to the final winner, in order to correct the values
        # of each move. All the moves of the winner receive value=1 and all the moves of the loser receive value=-1
        # The neural network will learn to predict the probabilities and the values.
        # It will learn wrong probas and values at the beginning, but after some time, the CNN and the neural network
        # will improve from eachother and converge.
        player1 = Agent('cnn_agent_1', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        player2 = Agent('cnn_agent_2', config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, GenRandomModel())
        
        scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

        # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely
        memory.clear_stmemory()

        cnn1 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
        cnn2 = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE, config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
        cnn2.model.set_weights(cnn1.model.get_weights())
        cnn1.plot_model()

        player1.model = cnn1

        ######## RETRAINING ########
        player1.replay(memory.ltmemory)

        for _ in range(1):

            scores, memory, points, sp_scores = play_matches.playMatches(player1, player2, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)

            # play_matches.playMatches() has copied stmemory to ltmemory, so we can clear stmemory safely
            memory.clear_stmemory()

            player1.replay(memory.ltmemory)

        
        print('TOURNAMENT...')
        scores, _, points, sp_scores = play_matches.playMatches(player1, player2, config.EVAL_EPISODES, lg.logger_main, turns_until_tau0 = 0, memory = None)
        print('\nSCORES')
        print(scores)
        print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
        print(sp_scores)
    def __init__(self,
                 env,
                 run_version,
                 player1version,
                 player2version,
                 EPISODES,
                 logger,
                 turns_until_tau0,
                 goes_first=0):
        self.EPISODES = EPISODES
        self.turns_until_tau0 = turns_until_tau0
        self.logger = logger
        self.goes_first = goes_first

        if player1version == -1:
            self.player1 = User('player1', env.state_size, env.action_size)
        else:
            self.player1_NN = Residual_CNN(config.REG_CONST,
                                           config.LEARNING_RATE,
                                           env.input_shape, env.action_size,
                                           config.HIDDEN_CNN_LAYERS)

            if player1version > 0:
                self.player1_network = self.player1_NN.read(
                    env.name, run_version, player1version)
                self.player1_NN.model.set_weights(
                    self.player1_network.get_weights())
            self.player1 = Agent('player1', env.state_size, env.action_size,
                                 config.MCTS_SIMS, config.CPUCT,
                                 self.player1_NN)

        if player2version == -1:
            self.player2 = User('player2', env.state_size, env.action_size)
        else:
            self.player2_NN = Residual_CNN(config.REG_CONST,
                                           config.LEARNING_RATE,
                                           env.input_shape, env.action_size,
                                           config.HIDDEN_CNN_LAYERS)

            if player2version > 0:
                self.player2_network = self.player2_NN.read(
                    env.name, run_version, player2version)
                self.player2_NN.model.set_weights(
                    self.player2_network.get_weights())
            self.player2 = Agent('player2', env.state_size, env.action_size,
                                 config.MCTS_SIMS, config.CPUCT,
                                 self.player2_NN)
Пример #10
0
def main():
    env = Game()
    if initial.INITIAL_MEMORY_VERSION == None:
        memory =  Memory(config.MEMORY_SIZE)

    # load neural network
    current_NN = Residual_CNN(config.REG_CONST,config.LEARNING_RATE,(2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS) #????
    best_NN = Residual_CNN(config.REG_CONST,config.LEARNING_RATE,(2,) + env.grid_shape, env.action_size, config.HIDDEN_CNN_LAYERS)

    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

    #create players
    current_player = 
    best_player =
    iter = 0

    while 1:
        pass
def playMatchesBetweenVersions(env, run_version, player1version, player2version, EPISODES, logger, turns_until_tau0, goes_first = 0):
    
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            player1_network = player1_NN.read(env.name, run_version, player1version)
            player1_NN.model.set_weights(player1_network.get_weights())   
        player1 = Agent('player1', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, env.input_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
        
        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version, player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, player2_NN)

    printmoves = player1version == -1 or player2version == -1
    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES, logger, turns_until_tau0, None, goes_first, printmoves)

    return (scores, memory, points, sp_scores)
Пример #12
0
def loadAgent(env, config, name):
    agent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                            env.input_shape, env.action_size,
                            config.HIDDEN_CNN_LAYERS)
    agent_network = load_model("./models/" + name + '.h5',
                               custom_objects={
                                   'softmax_cross_entropy_with_logits':
                                   softmax_cross_entropy_with_logits
                               })
    agent_NN.model.set_weights(agent_network.get_weights())
    agent = Agent(name, env.state_size, env.action_size, config.MCTS_SIMS,
                  config.CPUCT, agent_NN)
    return agent
Пример #13
0
def predict():

    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                              (2, ) + (6, 7), 42, config.HIDDEN_CNN_LAYERS)
    current_NN.model.set_weights(
        current_NN.read('connect4', 2, 74).get_weights())
    current_player = Agent('current_player', 84, 42, config.MCTS_SIMS,
                           config.CPUCT, current_NN)

    if request.method == 'POST':
        try:
            data = request.get_json()
            gs = GameState(np.array(json.loads(data["gameState"])), 1)
            #print(gs)
            preds = current_player.get_preds(gs)
            preds = np.array(preds[1]).reshape(6, 7)
            pred_arg = np.unravel_index(preds.argmax(), preds.shape)

        except ValueError:
            return jsonify("Please enter a proper GameState.")

        return jsonify([int(x) for x in pred_arg])
    if request.method == 'GET':
        return "Hello World! GET request"
Пример #14
0
def train_network(agent, train_phase):
    if train_phase[0] == 'start':
        net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           config.INPUT_START_DIM, config.OUTPUT_START_DIM,
                           config.HIDDEN_CNN_LAYERS)
        net_str = 's'
    elif train_phase[0] == 'general':
        net = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                           config.INPUT_DIM, config.OUTPUT_DIM,
                           config.HIDDEN_CNN_LAYERS)
        net_str = 'g'

    net.read(agent, net_str)

    #validation_file = choice(listdir('validation_states'))
    #with open('validation_states\\' + validation_file, 'rb') as input_file:
    #    validation = pickle.load(input_file)

    min_val_error = 10000.0
    for i in range(config.TRAINING_LOOPS):
        print("Iteration #" + str(i))

        game_file = choice(listdir('train_states'))
        with open('train_states\\' + game_file, 'rb') as input_file:
            game_memory = pickle.load(input_file)
        remove('train_states\\' + game_file)

        hist = net.fit(game_memory['batch_states'],
                       game_memory['batch_targets'], config.EPOCHS, 2, 0.0, 32)

        #metric = hist.history['val_loss'][config.EPOCHS - 1]

        #if metric < min_val_error:
        #    min_val_error = metric
        net.write(agent, net_str)

        print("Min Loss: " + str(min_val_error))
Пример #15
0
def playMatchesBetweenVersions(env,
                               run_version,
                               player1version,
                               player2version,
                               EPISODES,
                               logger,
                               turns_until_tau0,
                               goes_first=0):
    env = Game()
    if player1version == -1:
        player1 = User("user1", env.state_size, env.action_size)
    else:
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player1version > 0:
            name = env.name + "{0:0>4}".format(player1version)
            if Provider.getNetByName(name) == None:
                return
            player1_network = player1_NN.read(env.name, run_version,
                                              player1version)
            player1_NN.model.set_weights(player1_network.get_weights())
        netName = env.name + "{0:0>4}".format(player1version)
        player1 = Agent(netName, env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        name = input('enter username: ')
        user2 = Provider.getPersonByName(name)
        player2 = User(user2.name, env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player2version > 0:
            name = env.name + "{0:0>4}".format(player2version)
            if Provider.getNetByName(name) == None:
                return
            player2_network = player2_NN.read(env.name, run_version,
                                              player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        net2Name = env.name + "{0:0>4}".format(player2version)
        player2 = Agent(net2Name, env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                    logger, turns_until_tau0,
                                                    None, goes_first)

    return (scores, memory, points, sp_scores)
Пример #16
0
def playMatchesBetweenVersions(env,
                               run_version,
                               player1version,
                               player2version,
                               EPISODES,
                               logger,
                               turns_until_tau0,
                               goes_first=0):
    #-1代表的是玩家
    if player1version == -1:
        player1 = User('player1', env.state_size, env.action_size)
    else:
        #Residual_CNN 返回的是一个x
        player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)
        if player1version > 0:
            #如果不是玩家,则读取训练好的版本及相关权重
            player1_network = player1_NN.read(env.name, run_version,
                                              player1version)
            player1_NN.model.set_weights(player1_network.get_weights())

        #对其进行模拟,以及mcts树的构建
        player1 = Agent('player1', env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player1_NN)

    if player2version == -1:
        player2 = User('player2', env.state_size, env.action_size)
    else:
        player2_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  env.input_shape, env.action_size,
                                  config.HIDDEN_CNN_LAYERS)

        if player2version > 0:
            player2_network = player2_NN.read(env.name, run_version,
                                              player2version)
            player2_NN.model.set_weights(player2_network.get_weights())
        player2 = Agent('player2', env.state_size, env.action_size,
                        config.MCTS_SIMS, config.CPUCT, player2_NN)

    scores, memory, points, sp_scores = playMatches(player1, player2, EPISODES,
                                                    logger, turns_until_tau0,
                                                    None, goes_first)

    return (scores, memory, points, sp_scores)
Пример #17
0
chessenv = Game()
memory = Memory(config.MEMORY_SIZE)
memory.commit_stmemory(env.identities,env.gameState,env.actionSpace)
memory.stmemory


pool = multiprocessing.Pool(2)
out = zip(pool.map(_selfplay, range(0, 2)))
t = tuple(out)

len(t)

chessenv.action_size
chessenv.state_size
chessenv.grid_shape
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (119,)+chessenv.grid_shape, chessenv.action_size, config.HIDDEN_CNN_LAYERS)
best_NN.model.set_weights(current_NN.model.get_weights())

best_player_version  = 2
print('LOADING MODEL VERSION ' + str(2) + '...')
m_tmp = best_NN.read(chessenv.name, 2, best_player_version)
current_NN.model.set_weights(m_tmp.get_weights())
best_NN.model.set_weights(m_tmp.get_weights())

current_player = Agent('current_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', chessenv.state_size, chessenv.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)


state = chessenv.reset()
state.render(None)
Пример #18
0

lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=.      NEW LOG      =*=*=*=*=*')
lg.logger_main.info('=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*')

env = Game()

import config

memory = Memory(config.MEMORY_SIZE)

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, Game.InputShape, Game.ActionSize, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = current_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    m_tmp = best_NN.read(Game.Name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())
Пример #19
0
                open(
                    run_archive_folder + env.name + '/run' +
                    str(initialise.INITIAL_RUN_NUMBER).zfill(4) +
                    "/memory/decision_" + str(d_t) + "_memory" +
                    str(MEM_VERSION).zfill(4) + ".p", "rb")))

######## LOAD MODEL IF NECESSARY ########

current_NN = []
best_NN = []

# create an untrained neural network objects from the config file
for i in range(DECISION_TYPES):
    current_NN.append(
        Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                     (1, ) + env.grid_shape, env.action_size[i],
                     config.HIDDEN_CNN_LAYERS, i))
    best_NN.append(
        Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                     (1, ) + env.grid_shape, env.action_size[i],
                     config.HIDDEN_CNN_LAYERS, i))

best_player_version = []
# If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != [None] * DECISION_TYPES:
    for i, version in enumerate(initialise.INITIAL_MODEL_VERSION):
        best_player_version.append(initialise.INITIAL_MODEL_VERSION[i])
        print('LOADING MODEL VERSION ' +
              str(initialise.INITIAL_MODEL_VERSION[i]) + '...')
        m_tmp = best_NN[i].read(env.name, initialise.INITIAL_RUN_NUMBER,
                                version)
Пример #20
0
            str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))

    if memories.MEMORY_SIZE != MEMORY_SIZE:
        memories.extension(MEMORY_SIZE)

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
if len(env.grid_shape) == 2:
    shape = (1, ) + env.grid_shape
else:
    shape = env.grid_shape

if TEAM_SIZE > 1:
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                              int(PLAYER_COUNT / TEAM_SIZE),
                              config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                           int(PLAYER_COUNT / TEAM_SIZE),
                           config.HIDDEN_CNN_LAYERS)
    opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
else:
    current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                              PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
    best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                           PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
    opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
Пример #21
0
env = Game()

# # If loading an existing neural network, copy the config file to root
if initialise.INITIAL_RUN_NUMBER != None:
    copyfile(
        run_archive_folder + env.name + '/run' +
        str(initialise.INITIAL_RUN_NUMBER).zfill(4) + '/config.py',
        './config.py')

import config

# ######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) +
          '...')
    memory = pickle.load(
        open(
            run_archive_folder + env.name + '/run' +
            str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" +
            str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))

current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)

current_NN.write(1, 1)
def evaluation_worker(conn):
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    import initialise
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from game import Game
    from agent import Agent
    from memory import Memory
    from funcs import playMatches
    import loggers as lg
    import logging
    import time

    # initialise new test memory
    test_memories = Memory(int(MEMORY_SIZE / 10))

    env = Game()

    # initialise new models
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, int(PLAYER_COUNT / TEAM_SIZE),
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                  shape, PLAYER_COUNT,
                                  config.HIDDEN_CNN_LAYERS)
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    current_player_version = 0
    best_player_version = 0
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        #print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                             initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        best_NN.model.set_weights(m_tmp.get_weights())
    # otherwise just ensure the weights on the two players are the same
    else:
        best_NN.model.set_weights(current_NN.model.get_weights())

    current_player = Agent('current_player', config.MCTS_SIMS, config.CPUCT,
                           current_NN)
    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)

    time.sleep(20)

    while 1:
        # request current_NN weights
        conn.send(current_player_version)
        # wait indefinitely for current_NN weights
        conn.poll(None)
        data = conn.recv()

        if data:

            # set current_NN weights
            current_NN.model.set_weights(data)
            current_player_version += 1

            # play tournament games
            tourney_players = []
            if TEAM_SIZE > 1:
                for i in range(int(PLAYER_COUNT / TEAM_SIZE)):  # for each team
                    for k in range(
                            TEAM_SIZE
                    ):  # alternate adding best_players and current_players up to the TEAM_SIZE
                        if k % 2 == 0:
                            tourney_players.append(best_player)
                        else:
                            tourney_players.append(current_player)
            else:
                for i in range(PLAYER_COUNT):
                    if i % 2 == 0:
                        tourney_players.append(best_player)
                    else:
                        tourney_players.append(current_player)

            scores, test_memories = playMatches(tourney_players,
                                                config.EVAL_EPISODES,
                                                lg.logger_tourney,
                                                0.0,
                                                test_memories,
                                                evaluation=True)
            test_memories.clear_stmemory()

            # if the current player is significantly better than the best_player replace the best player
            # the replacement is made by just copying the weights of current_player's nn to best_player's nn
            if scores['current_player'] > scores[
                    'best_player'] * config.SCORING_THRESHOLD:
                # if current_NN won send message
                conn.send(((current_player_version, best_player_version),
                           str(scores)))

                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_NN.write(env.name, best_player_version)

            if len(
                    test_memories.ltmemory
            ) == test_memories.MEMORY_SIZE and current_player_version % 5 == 0:
                pickle.dump(
                    memories,
                    open(
                        run_folder + "memory/test_memory" +
                        str(current_player_version).zfill(4) + ".p", "wb"))

                #print("Evaluating performance of current_NN")
                #current_player.evaluate_accuracy(test_memories.ltmemory)
                #print('\n')
        else:
            time.sleep(10)
Пример #23
0
def self_play_worker(conn):
    import os
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, MEMORY_SIZE
    from memory import Memory
    from settings import run_folder, run_archive_folder
    import initialise
    from game import Game, GameState
    from agent import Agent
    from model import Residual_CNN, import_tf
    import_tf(1024 * 3)
    from shutil import copyfile
    from funcs import playMatches
    import loggers as lg
    import logging
    import random

    env = Game()

    ######## LOAD MODEL IF NECESSARY ########
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               int(PLAYER_COUNT / TEAM_SIZE),
                               config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, int(PLAYER_COUNT / TEAM_SIZE),
                                   config.HIDDEN_CNN_LAYERS)
    else:
        best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                               PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)
        opponent_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                   shape, PLAYER_COUNT,
                                   config.HIDDEN_CNN_LAYERS)

    best_player_version = 0
    best_NN.model.set_weights(opponent_NN.model.get_weights())

    best_player = Agent('best_player', config.MCTS_SIMS, config.CPUCT, best_NN)
    opponent_player = Agent('selected_opponent', config.MCTS_SIMS,
                            config.CPUCT, opponent_NN)

    if initialise.INITIAL_ITERATION != None:
        iteration = initialise.INITIAL_ITERATION
    else:
        iteration = 0

    memories = Memory(150 * config.EPISODES)
    while 1:
        iteration += 1

        # request best_NN weights
        conn.send(best_player_version)
        # wait indefinitely for best_NN weights
        conn.poll(None)
        data = conn.recv()
        #print('recieved: {}'.format(data))

        # if weights different set weights
        if data:
            best_NN.model.set_weights(data[1])
            best_player_version = data[0]

        if len(memories.ltmemory) != 0:  # send new memories (skip first loop)
            conn.send(memories.ltmemory)

        memories = Memory(150 * config.EPISODES)
        ######## CREATE LIST OF PLAYERS #######
        # for training it is just 2 copies of the best_player vs. 2 copies of another randomly selected model
        filenames = os.listdir('run/models/')
        filenames = [name for name in filenames if '.h5' == name[-3:]]

        if filenames:
            opponent = random.choice(filenames)
            m_tmp = opponent_NN.read_specific('run/models/' + opponent)
            opponent_NN.model.set_weights(m_tmp.get_weights())

            self_play_players = []
            for i in range(PLAYER_COUNT):
                if i % 2 == 0:
                    self_play_players.append(best_player)
                else:
                    self_play_players.append(opponent_player)
        else:
            self_play_players = []
            for i in range(PLAYER_COUNT):
                self_play_players.append(best_player)

        #print("Version {} randomly selected to play against version {}".format(int(opponent[-7:-3]), best_player_version))

        ######## SELF PLAY ########
        #epsilon = init_epsilon - iteration * (init_epsilon / 50.0)
        epsilon = 0

        #print('Current epsilon: {}'.format(epsilon))
        print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
        _, memories = playMatches(self_play_players,
                                  config.EPISODES,
                                  lg.logger_main,
                                  epsilon,
                                  memory=memories)
Пример #24
0
class Agent_NN:
    def __init__(self, enable_cache=False):
        self.nn_start = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                                     config.INPUT_START_DIM,
                                     config.OUTPUT_START_DIM,
                                     config.HIDDEN_CNN_LAYERS)
        self.nn = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                               config.INPUT_DIM, config.OUTPUT_DIM,
                               config.HIDDEN_CNN_LAYERS)

        self.enable_cache = enable_cache
        self.cache = {}

    def purge_cache(self):
        self.cache = {}

    def nn_read(self, name):
        self.nn_start.read(name, 's')
        self.nn.read(name, 'g')

    def nn_write(self, name):
        self.nn_start.write(name, 's')
        self.nn.write(name, 'g')

    def predict(self, state, perspective, mcts):
        network = self.build_nn_input(state, perspective, mcts=mcts)

        if network.shape[1] == config.INPUT_DIM[0]:
            return self.nn.predict(network)
        else:
            return self.nn_start.predict(network)

    def build_start_nn_input(self, state, perspective):
        nn_input = np.zeros(
            (1, config.INPUT_START_DIM[0], config.INPUT_START_DIM[1],
             config.INPUT_START_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 2 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 12 + 2 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 19 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # State
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 0:
            nn_input[0, 39, :, :] = 1
        if (state.game_phase == config.PHASE_INITIAL_SETTLEMENT
                or state.game_phase == config.PHASE_INITIAL_ROAD
            ) and state.initial_phase_decrease == 1:
            nn_input[0, 40, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 41 + p_order, :, :] = 1

        return nn_input

    def build_nn_input(self, state, perspective, mcts=None):
        if state.game_phase == config.PHASE_INITIAL_SETTLEMENT or state.game_phase == config.PHASE_INITIAL_ROAD:
            return self.build_start_nn_input(state, perspective)

        nn_input = np.zeros(
            (1, config.INPUT_DIM[0], config.INPUT_DIM[1], config.INPUT_DIM[2]),
            dtype=np.float32)

        numbers_output = {
            2: 1,
            3: 2,
            4: 3,
            5: 4,
            6: 5,
            8: 5,
            9: 4,
            10: 3,
            11: 2,
            12: 1
        }
        rotation = np.random.randint(12)

        if self.enable_cache is True and rotation in self.cache:
            nn_input[:, :11, :, :] = self.cache[rotation]

        else:
            # Resources outputs
            for number, tile in state.numbers:
                resource = state.tiles[tile]
                for vertex in config.tiles_vertex[tile]:
                    nn_input[0, resource - 2,
                             config.vertex_to_nn_input[rotation][vertex][0],
                             config.vertex_to_nn_input[rotation][vertex]
                             [1]] += numbers_output[number] / 15.0

            # Ports
            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD, config.GENERIC
            ]):
                indices = [i for i, x in enumerate(state.ports) if x == r]
                for i in indices:
                    for vertex in config.ports_vertex[i]['vert']:
                        nn_input[
                            0, key + 5,
                            config.vertex_to_nn_input[rotation][vertex][0],
                            config.vertex_to_nn_input[rotation][vertex][1]] = 1

            if self.enable_cache is True:
                self.cache[rotation] = nn_input[:, :11, :, :]

        # Settlements, cities, roads
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            for s in state.players[p].settlements:
                nn_input[0, 11 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1
            for c in state.players[p].cities:
                nn_input[0, 12 + 3 * p_order,
                         config.vertex_to_nn_input[rotation][c][0],
                         config.vertex_to_nn_input[rotation][c][1]] = 1
            for r in state.players[p].roads:
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[0]][0],
                    config.vertex_to_nn_input[rotation][r[0]][1]] += 1 / 3.0
                nn_input[
                    0, 13 + 3 * p_order,
                    config.vertex_to_nn_input[rotation][r[1]][0],
                    config.vertex_to_nn_input[rotation][r[1]][1]] += 1 / 3.0

        # Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.SHEEP, config.ORE, config.BRICK, config.WHEAT,
                    config.WOOD
            ]):
                nn_input[0, 23 + key +
                         5 * p_order, :, :] = state.players[p].cards[r] / 10.0

        # Robber
        for vertex in config.tiles_vertex[state.robber_tile]:
            nn_input[0, 43, config.vertex_to_nn_input[rotation][vertex][0],
                     config.vertex_to_nn_input[rotation][vertex][1]] = 1

        # Army Cards Played
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     44 + p_order, :, :] = state.players[p].used_knights / 5.0

        # Army Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     48 + p_order, :, :] = state.players[p].largest_army_badge

        # Longest Road Holder
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            nn_input[0,
                     52 + p_order, :, :] = state.players[p].longest_road_badge

        # Special Cards
        for p in range(4):
            p_order = (4 + p - perspective) % 4

            for key, r in enumerate([
                    config.VICTORY_POINT, config.KNIGHT, config.MONOPOLY,
                    config.ROAD_BUILDING, config.YEAR_OF_PLENTY
            ]):
                nn_input[0, 56 + key + 5 *
                         p_order, :, :] = state.players[p].special_cards.count(
                             r) / 3.0

        # Discarding, initial game phase
        if state.game_phase == config.PHASE_DISCARD:
            nn_input[0, 76, :, :] = 1

        # Player turn
        for p in range(4):
            p_order = (4 + p - perspective) % 4
            if p == state.player_turn:
                nn_input[0, 77 + p_order, :, :] = 1

        # Other game phases
        if state.game_phase == config.PHASE_THROW_DICE:
            nn_input[0, 81, :, :] = 1
        if state.game_phase == config.PHASE_MOVE_ROBBER:
            nn_input[0, 82, :, :] = 1
        if state.game_phase == config.PHASE_STEAL_CARD:
            nn_input[0, 83, :, :] = 1
        if state.game_phase == config.PHASE_ROAD_BUILDING:
            nn_input[0, 84, :, :] = 1
        if state.game_phase == config.PHASE_YEAR_OF_PLENTY:
            nn_input[0, 85, :, :] = 1
        if state.game_phase == config.PHASE_TRADE_RESPOND:
            nn_input[0, 86, :, :] = 1

        for s in range(54):
            if state.available_settlement_spot(s):
                nn_input[0, 87, config.vertex_to_nn_input[rotation][s][0],
                         config.vertex_to_nn_input[rotation][s][1]] = 1

        return nn_input
Пример #25
0
# If loading an existing neural network, copy the config file to root
#if INITIAL_RUN_NUMBER != None:
#copyfile(run_archive_folder + env.name + '/run' + str(INITIAL_RUN_NUMBER).zfill(4) + '/config.py', './config.py')

if INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load(
        open(
            run_archive_folder + env.name + '/run' +
            str(INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" +
            str(INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))

current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                       (2, ) + env.grid_shape, env.action_size,
                       config.HIDDEN_CNN_LAYERS)

best_player_version = 0
best_NN.model.set_weights(current_NN.model.get_weights())

copyfile('./config.py', run_folder + 'config.py')
# plot_model(current_NN.model, to_file=run_folder + 'models/model.png', show_shapes = True)

current_player = Agent('current_player', env.state_size, env.action_size,
                       config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size,
                    config.MCTS_SIMS, config.CPUCT, best_NN)
Пример #26
0
                idx_end = mem_size
            memory_x[idx_start:idx_end] = x
            memory_y_value[idx_start:idx_end] = y_value
            memory_y_policy[idx_start:idx_end] = y_policy

            idx_start += len(x)
            n_games += 1
            pbar.update(len(x))
            pbar.set_postfix(game=n_games, tot_games=i + 1)

    # Build Model
    model = Residual_CNN(
        reg_const=0.0001,
        learning_rate=0.01,
        input_dim=(9, 9, 7),
        output_dim=9 * 9 + 1,
        hidden_layers=[{
            'filters': 128,
            'kernel_size': (3, 3)
        }] * 10,
    )
    try:
        model.load('Go', '0.1')
        print("Loaded Model")
    except:
        print("Training New Model")

    # Train Model
    model.model.compile(
        loss={
            'value_head': 'binary_crossentropy',
            'policy_head': 'categorical_crossentropy'
Пример #27
0
import config

######## LOAD MEMORIES IF NECESSARY ########

if initialise.INITIAL_MEMORY_VERSION == None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) + '...')
    memory = pickle.load( open( run_archive_folder + env.name + '/run' + str(initialise.INITIAL_RUN_NUMBER).zfill(4) + "/memory/memory" + str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p",   "rb" ) )

######## LOAD MODEL IF NECESSARY ########

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

#If loading an existing neural netwrok, set the weights from that model
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())
#otherwise just ensure the weights on the two players are the same
else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())

#copy the config file to the run folder
copyfile('./config.py', run_folder + 'config.py')
def retraining_worker(conn):
    from game import Game
    import initialise
    import config
    from config import PLAYER_COUNT, TEAM_SIZE, BATCH_SIZE, TRAINING_LOOPS
    from model import Residual_CNN, import_tf
    import_tf(1024 * 2)
    import numpy as np
    import time
    
    env = Game()
    
    ######## LOAD MODEL IF NECESSARY ########

    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1,) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, int(PLAYER_COUNT / TEAM_SIZE),
                            config.HIDDEN_CNN_LAYERS)
    else:
        current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape, PLAYER_COUNT,
                            config.HIDDEN_CNN_LAYERS)
    
    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        m_tmp = current_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, initialise.INITIAL_MODEL_VERSION)
        current_NN.model.set_weights(m_tmp.get_weights())
        
    train_overall_loss = []
        
    while 1:
        # request memory samples
        conn.send((TRAINING_LOOPS, BATCH_SIZE))
        
        # wait for memory samples
        conn.poll(None)
        data = conn.recv()
        
        if data:
            # train on sampled memories
            for i, minibatch in enumerate(data):
                training_states = np.array([current_NN.convertToModelInput(row['state']) for row in minibatch])
                training_targets = {'value_head': np.array([row['value'] for row in minibatch])}
                
                fit = current_NN.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1,\
                                        validation_split=0, batch_size=32)
                
                if i == 0:
                    init_loss = fit.history['loss'][0]
                
                train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1], 4))
            
            """display.clear_output(wait=True)
            display.display(pl.gcf())
            pl.gcf().clear()
            time.sleep(.25)

            print('\n')
            current_NN.printWeightAverages()

            print("Max = {0}, Min = {1}, latest = {2}".format(max(self.train_overall_loss), min(self.train_overall_loss), self.train_overall_loss[-1]))
            print("Loss reduction: {}".format(init_loss - fit.history['loss'][0]))"""
            
            # send new current_NN weights
            conn.send((current_NN.model.get_weights(), train_overall_loss[-1]))
        else:
            time.sleep(10)
Пример #29
0

# The idea is as follows:
# player1 has to play. His Monte Carlo Tree Search does N simulations in order to evaluate the best possible move.
# If N is very big, like 5000+, the estimation should be quite accurate. But if N is smaller, like 50, the estimate
# will be wrong because the MCTS will stop after 2 or 3 moves maximum, and the "expectations" at the leaves corresponding
# to these game states will not be estimated correctly.
# This is where the neural networks arrives: if trained on a big number of states (with correct expectations),
# it will be able to predict correctly the state of the leaves of the MCTS, and the global estimation of the MCTS
# will be much better.
# As the beginning, the predictions of the neural network will be wrong, so the results of the MCTS will be wrong as well,
# but after enough games, both the neural network and the MCTS  will improve and converge.
env = Game()

# create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) + config.GRID_SHAPE,   config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (1,) +  config.GRID_SHAPE,   config.GRID_SHAPE[1], config.HIDDEN_CNN_LAYERS)
best_NN.model.set_weights(current_NN.model.get_weights())

current_player = Agent(CURRENT_PLAYER_NAME, config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent(BEST_PLAYER_NAME, config.GRID_SHAPE[0] * config.GRID_SHAPE[1], config.GRID_SHAPE[1], config.MCTS_SIMS, config.CPUCT, best_NN)

best_player_version = 0

iteration = 0

while 1:

    iteration += 1
    
    lg.logger_main.info('ITERATION NUMBER ' + str(iteration))
Пример #30
0
        mem_version = initialise.INITIAL_MEMORY_VERSION

        if memories.MEMORY_SIZE != MEMORY_SIZE:
            memories.extension(MEMORY_SIZE)

    ######## LOAD MODEL IF NECESSARY ########
    # create an untrained neural network objects from the config file
    if len(env.grid_shape) == 2:
        shape = (1, ) + env.grid_shape
    else:
        shape = env.grid_shape

    if TEAM_SIZE > 1:
        tmp_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                              int(PLAYER_COUNT / TEAM_SIZE),
                              config.HIDDEN_CNN_LAYERS)
    else:
        tmp_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, shape,
                              PLAYER_COUNT, config.HIDDEN_CNN_LAYERS)

    # If loading an existing neural netwrok, set the weights from that model
    if initialise.INITIAL_MODEL_VERSION != None:
        best_player_version = initialise.INITIAL_MODEL_VERSION
        print('LOADING MODEL VERSION ' +
              str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = tmp_NN.read(env.name, initialise.INITIAL_RUN_NUMBER,
                            initialise.INITIAL_MODEL_VERSION)

        current_weights = {0: m_tmp.get_weights()}
        best_weights = m_tmp.get_weights()
# Load memory if necessary
if initialise.INITIAL_MEMORY_VERSION is None:
    memory = Memory(config.MEMORY_SIZE)
else:
    print('LOADING MEMORY VERSION ' + str(initialise.INITIAL_MEMORY_VERSION) +
          '...')
    memory = pickle.load(
        open(
            run_archive_folder + 'Model_' +
            str(initialise.INITIAL_RUN_NUMBER) + "/memory/memory" +
            str(initialise.INITIAL_MEMORY_VERSION).zfill(4) + ".p", "rb"))

# Create an untrained neural network objects from the config file
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                       (2, ) + env.grid_shape, env.action_size,
                       config.HIDDEN_CNN_LAYERS)

# Load existing neural network if needed
if initialise.INITIAL_MODEL_VERSION is not None:
    best_player_version = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) +
          '...')
    m_tmp = best_NN.read(initialise.INITIAL_RUN_NUMBER, best_player_version)
    current_NN.model.set_weights(m_tmp.get_weights())
    best_NN.model.set_weights(m_tmp.get_weights())

# Otherwise ensure the initial weights are the same for both players
Пример #32
0
import pickle
import numpy as np
import initialise


def grp(pat, txt):
    r = re.search(pat, txt)
    return r.group(0) if r else '&'


color = ['b', 'r', 'c', 'm', 'g']

env = Game()
# create an untrained neural network objects from the config file
player1_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE,
                          (2, ) + env.grid_shape, env.action_size,
                          config.HIDDEN_CNN_LAYERS)
# player2_NN = CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

path1 = './run/models/'
path3 = './Versions/'
path2 = './results/figures/valeur_double_menace/'
EPISODES = 5
try:
    os.mkdir(path2)
except:
    pass

version_list_CNN = os.listdir(path1)
version_list_CNN.sort(key=lambda l: grp('(0-9+)', l))