예제 #1
0
def combine_prev_iters_train_data(board_x, pi_y, v_y, iteration_count):
    all_board_x, all_pi_y, all_v_y = [], [], []

    if len(board_x) > 0 and len(pi_y) > 0 and len(v_y) > 0:
        all_board_x.append(board_x)
        all_pi_y.append(pi_y)
        all_v_y.append(v_y)

    # Read data from previous iterations
    for i in range(iteration_count - PAST_ITER_COUNT, iteration_count):
        if i >= 0:
            filename = '{}/{}{}.h5'.format(SAVE_TRAIN_DATA_DIR,
                                           SAVE_TRAIN_DATA_PREF, i)

            if not os.path.exists(filename):
                utils.stress_message('{} does not exist!'.format(filename))
                continue

            with h5py.File(filename, 'r') as H:
                all_board_x.append(np.copy(H['board_x']))
                all_pi_y.append(np.copy(H['pi_y']))
                all_v_y.append(np.copy(H['v_y']))

    if len(all_board_x) > 0 and len(all_pi_y) > 0 and len(all_v_y) > 0:
        # Make a pool of training data from previous iterations
        board_x = np.vstack(all_board_x)
        pi_y = np.vstack(all_pi_y)
        v_y = np.hstack(all_v_y)  # hstack as v_y is 1D array

        return board_x, pi_y, v_y, len(
            all_board_x)  # Last retval is total iterations used

    # If no data at all: return empty training data
    return [], [], [], 0
예제 #2
0
 def save_weights(self, save_dir, prefix, version):
     if not os.path.exists(save_dir):
         os.makedirs(save_dir)
     self.model.save_weights('{}/{}{:0>4}-weights.h5'.format(
         save_dir, prefix, version))
     utils.stress_message(
         'Saved model weights "{}{:0>4}-weights" to "{}"'.format(
             prefix, version, save_dir), True)
def agent_greedy_match(model_path,
                       num_games,
                       verbose=False,
                       tree_tau=DET_TREE_TAU):
    player1 = 'ai'
    player2 = 'greedy'
    win_count = {player1: 0, player2: 0}
    model = load_agent(model_path)

    for i in range(num_games):
        if verbose:
            utils.stress_message('Game {}'.format(i + 1))

        if player1 == 'ai':
            game = Game(p1_type=player1,
                        p2_type=player2,
                        verbose=verbose,
                        model1=model)
        else:
            game = Game(p1_type=player1,
                        p2_type=player2,
                        verbose=verbose,
                        model2=model)

        winner = game.start()
        if winner is not None:
            if winner == PLAYER_ONE:
                win_count[player1] += 1
            else:
                win_count[player2] += 1
        # Swap
        player1, player2 = player2, player1

    if verbose:
        utils.stress_message(
            'Agent wins {} games and Greedy wins {} games with total games {}'.
            format(win_count['ai'], win_count['greedy'], num_games))

    if win_count['ai'] > win_count['greedy']:
        return model_path
    elif win_count['greedy'] > win_count['ai']:
        return 'greedy'
    else:
        return None
예제 #4
0
def train(model_path, board_x, pi_y, v_y, data_retention, version):
    # Set TF gpu limit
    import tensorflow as tf
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    session = tf.Session(config=tf_config)

    from keras.backend.tensorflow_backend import set_session
    set_session(session=session)

    np.random.seed()
    random.seed()

    message = 'At {}, Training Version {}, Number of examples: {} (retaining {:.1f}%)' \
        .format(utils.cur_time(), version, len(board_x), data_retention * 100)

    utils.stress_message(message, True)

    # Make sure path is not null if we are not training from scratch
    cur_model = ResidualCNN()
    if version > 0:
        assert model_path is not None
        cur_model.load_weights(model_path)

    # Sample a portion of training data before training
    sampled_idx = np.random.choice(len(board_x),
                                   int(data_retention * len(board_x)),
                                   replace=False)
    sampled_board_x = board_x[sampled_idx]
    sampled_pi_y = pi_y[sampled_idx]
    sampled_v_y = v_y[sampled_idx]

    cur_model.model.fit(sampled_board_x, [sampled_pi_y, sampled_v_y],
                        batch_size=BATCH_SIZE,
                        validation_split=0.05,
                        epochs=EPOCHS,
                        shuffle=True)

    cur_model.save_weights(SAVE_WEIGHTS_DIR, MODEL_PREFIX, version)
예제 #5
0
def generate_self_play_in_parallel(model_path,
                                   num_self_play,
                                   num_workers,
                                   model2_path=None):
    # Process pool for parallelism
    process_pool = mp.Pool(processes=num_workers)
    work_share = num_self_play // num_workers
    worker_results = []

    # Send processes to generate self plays
    for i in range(num_workers):
        if i == num_workers - 1:
            work_share += (num_self_play % num_workers)

        # Send workers
        result_async = process_pool.apply_async(generate_self_play,
                                                args=(i + 1, model_path,
                                                      work_share, model2_path))
        worker_results.append(result_async)

    try:
        # Join processes and summarise the generated final list of games
        game_list = []
        for result in worker_results:
            game_list += result.get()

        process_pool.close()

    # Exit early if need
    except KeyboardInterrupt:
        utils.stress_message('SIGINT caught, exiting')
        process_pool.terminate()
        process_pool.join()
        exit()

    process_pool.join()

    return game_list
def train(num_games, model, version):
    # print some useful message
    message = 'At {}, Starting to generate {} greedy self-play games for version {}'.format(
        utils.cur_time(), num_games, version)
    utils.stress_message(message, True)

    # Generate games
    games = generate_self_play_in_parallel(num_games, NUM_WORKERS)

    utils.stress_message('Preparing training examples from {} games'.format(
        len(games)))

    # Convert self-play games to training data
    board_x, pi_y, v_y = utils.convert_to_train_data(games)
    board_x, pi_y, v_y = utils.augment_train_data(board_x, pi_y, v_y)
    assert len(board_x) == len(pi_y) == len(v_y)

    print('\nNumber of training examples (Total): {}'.format(len(board_x)))

    # Sample a portion of training data
    num_train_data = int(G_DATA_RETENTION_RATE * len(board_x))

    sampled_idx = np.random.choice(len(board_x), num_train_data, replace=False)
    board_x_train = np.array(
        [board_x[sampled_idx[i]] for i in range(num_train_data)])
    pi_y_train = np.array(
        [pi_y[sampled_idx[i]] for i in range(num_train_data)])
    v_y_train = np.array([v_y[sampled_idx[i]] for i in range(num_train_data)])

    # board_x_val = np.array([board_x[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)])
    # pi_y_val = np.array([pi_y[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)])
    # v_y_val = np.array([v_y[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)])

    assert len(board_x_train) == len(pi_y_train) == len(v_y_train)
    print('Number of training examples (Sampled): {}\n'.format(
        len(board_x_train)))

    # Make sure that the directory is available
    if not os.path.exists(SAVE_WEIGHTS_DIR):
        os.makedirs(SAVE_WEIGHTS_DIR)

    model.model.fit(
        board_x_train,
        [pi_y_train, v_y_train],
        # validation_data=((board_x_val, [pi_y_val, v_y_val]) if G_NUM_VAL_DATA > 0 else None),
        validation_split=G_VAL_SPLIT,
        batch_size=G_BATCH_SIZE,
        epochs=G_ITER_PER_EPOCH,
        shuffle=True)
    # callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5),
    #            ModelCheckpoint(filepath=SAVE_WEIGHTS_DIR+'GreedyWeights-ep{epoch:02d}-val{val_loss:.2f}.h5',
    #                            save_best_only=True, save_weights_only=True)])

    model.save_weights(SAVE_WEIGHTS_DIR, G_MODEL_PREFIX, version=version)
    utils.stress_message(
        'GreedyModel Weights version {} saved to {}'.format(
            version, SAVE_WEIGHTS_DIR), True)
def evaluate_in_parallel(model1, model2, num_games, num_workers):
    if model2 is not None:
        utils.stress_message(
            'Evaluating model "{}" against model "{}" on {} games'.format(
                model1, model2, num_games), True)

    # Process pool for parallelism
    process_pool = mp.Pool(processes=num_workers)
    work_share = num_games // num_workers
    worker_results = []

    # Send processes to generate self plays
    for i in range(num_workers):
        if i == num_workers - 1:
            work_share += (num_games % num_workers)

        # Send workers
        result_async = process_pool.apply_async(evaluate,
                                                args=(i + 1, model1, model2,
                                                      work_share))
        worker_results.append(result_async)

    try:
        # Join processes and count games
        model1_wincount = model2_wincount = draw_count = 0
        for result in worker_results:
            game_stats = result.get()
            model1_wincount += game_stats[0]
            model2_wincount += game_stats[1]
            draw_count += game_stats[2]

        process_pool.close()

    # Exit early if need
    except KeyboardInterrupt:
        utils.stress_message('SIGINT caught, exiting')
        process_pool.terminate()
        process_pool.join()
        exit()

    process_pool.join()

    utils.stress_message(
        'Overall, model1 "{}" wins {}/{} against model2 "{}"'.format(
            model1, model1_wincount, num_games, model2), True)

    return model1_wincount, model2_wincount, draw_count
예제 #8
0
def evaluate_in_parallel(best_model, cur_model, num_games, num_workers):
    utils.stress_message(
        'Evaluating model "{}" against current best model "{}" on {} games'.
        format(cur_model, best_model, num_games), True)

    # Process pool for parallelism
    process_pool = mp.Pool(processes=num_workers)
    work_share = num_games // num_workers
    worker_results = []

    # Send processes to generate self plays
    for i in range(num_workers):
        if i == num_workers - 1:
            work_share += (num_games % num_workers)

        # Send workers
        result_async = process_pool.apply_async(evaluate,
                                                args=(i + 1, best_model,
                                                      cur_model, work_share))
        worker_results.append(result_async)

    try:
        # Join processes and count games
        cur_model_wincount = 0
        for result in worker_results:
            cur_model_wincount += result.get()

        process_pool.close()

    # Exit early if need
    except KeyboardInterrupt:
        utils.stress_message('SIGINT caught, exiting')
        process_pool.terminate()
        process_pool.join()
        exit()

    process_pool.join()

    utils.stress_message(
        'Overall, cur_model "{}" wins {}/{} against best_model "{}"'.format(
            cur_model, cur_model_wincount, num_games, best_model), True)

    return cur_model_wincount
예제 #9
0
import utils
import numpy as np
from game import Game
from config import *
"""
Run this file directly from terminal if you
want to play human-vs-greedy game
"""

if __name__ == '__main__':
    count = {PLAYER_ONE: 0, PLAYER_TWO: 0}
    num_games = 50
    end_states = []
    for i in range(num_games):
        utils.stress_message('Game {}'.format(i + 1))
        game = Game(p1_type='greedy', p2_type='greedy', verbose=False)
        winner = game.start()
        if winner is not None:
            count[winner] += 1

        end_states.append(game.board.board[..., 0])

    unique_states = np.unique(np.array(end_states), axis=0)
    print('\n{} end game states, {} of them is unique\n'.format(
        num_games, len(unique_states)))

    print('Player {} wins {} matches'.format(PLAYER_ONE, count[PLAYER_ONE]))
    print('Player {} wins {} matches'.format(PLAYER_TWO, count[PLAYER_TWO]))
예제 #10
0
def evolve(cur_model_path, other_opponent_for_selfplay, iteration_count,
           best_model):

    while True:
        # print some useful message
        message = 'At {}, Starting to generate self-plays for Version {}'.format(
            utils.cur_time(), iteration_count)
        utils.stress_message(message, True)

        ##########################
        ##### GENERATE PLAYS #####
        ##########################

        if other_opponent_for_selfplay is not None:
            print(
                '(Generating games using current model {} and other model {})'.
                format(cur_model_path, other_opponent_for_selfplay))
            games = generate_self_play_in_parallel(
                cur_model_path,
                NUM_SELF_PLAY,
                NUM_WORKERS,
                model2_path=other_opponent_for_selfplay)
        elif best_model is not None:
            print('(Generating games using given best model: {})'.format(
                best_model))
            games = generate_self_play_in_parallel(best_model, NUM_SELF_PLAY,
                                                   NUM_WORKERS)
        else:
            # # Use previous version to generate selfplay if necessary
            # model2_path = None
            # if SELF_PLAY_DIFF_MODEL and ITERATION_COUNT > 1:
            #     model2_version = get_rand_prev_version(ITERATION_COUNT)
            #     model2_path = get_weights_path_from_version(model2_version)
            #     utils.stress_message('.. and vs. Version {}'.format(model2_version))
            #
            # games = generate_self_play_in_parallel(cur_model_path, NUM_SELF_PLAY, NUM_WORKERS, model2_path)
            games = generate_self_play_in_parallel(cur_model_path,
                                                   NUM_SELF_PLAY, NUM_WORKERS)

        ##########################
        ##### PREPARING DATA #####
        ##########################

        # Convert self-play games to training data
        board_x, pi_y, v_y = utils.convert_to_train_data(games)
        board_x, pi_y, v_y = utils.augment_train_data(board_x, pi_y, v_y)

        # Numpyify and save for later iterations
        board_x, pi_y, v_y = np.array(board_x), np.array(pi_y), np.array(v_y)
        if len(board_x) > 0 and len(pi_y) > 0 and len(v_y) > 0:
            utils.save_train_data(board_x, pi_y, v_y, version=iteration_count)

        # Get prev iters training data
        board_x, pi_y, v_y, data_iters_used = combine_prev_iters_train_data(
            board_x, pi_y, v_y, iteration_count)
        assert len(board_x) == len(pi_y) == len(v_y)

        # Train only if there were data
        if data_iters_used == 0:
            utils.stress_message(
                'No training data for iteration {}! Re-iterating...'.format(
                    iteration_count))
            continue

        # Calculate training set retention rate including current iteration; use default if too high
        data_retention_rate = min(1. / data_iters_used,
                                  DEF_DATA_RETENTION_RATE)

        #################
        ##### TRAIN #####
        #################

        # Use a *new process* to train since we DONT want to load TF in the parent process
        training_process = mp.Process(target=train,
                                      args=(cur_model_path, board_x, pi_y, v_y,
                                            data_retention_rate,
                                            iteration_count))
        training_process.start()
        training_process.join()

        # Update path variable since we made a new version
        # cur_model_path = get_model_path_from_version(ITERATION_COUNT)
        cur_model_path = get_weights_path_from_version(iteration_count)

        ####################
        ##### EVALUATE #####
        ####################

        if best_model is not None:
            cur_model_wincount = evaluate_in_parallel(best_model,
                                                      cur_model_path,
                                                      EVAL_GAMES, NUM_WORKERS)
            if cur_model_wincount > int(0.55 * EVAL_GAMES):
                best_model = cur_model_path
                utils.stress_message(
                    'Now using {} as the best model'.format(best_model))
            else:
                utils.stress_message(
                    'Output model of this iteration is not better; retaining {} as the best model'
                    .format(best_model), True)

        # Update version number
        iteration_count += 1
예제 #11
0
        dest='other_opponent_for_selfplay',
        help=
        'boolean to specify selfplay generated by both current model and best model or not, \
                    if not generate with only best model')
    return parser


if __name__ == '__main__':
    parser = build_parser()
    args = parser.parse_args()
    model_path = args.model_path
    best_model = args.best_model_path
    other_opponent_for_selfplay = args.other_opponent_for_selfplay

    try:
        # Read the count from file name
        iteration_count = utils.find_version_given_filename(model_path) + 1
    except:
        iteration_count = 0

    if best_model is not None:
        print('\nBest model {} specified!\n'.format(best_model))
    if other_opponent_for_selfplay is not None:
        print('\nOpponent {} for selfplay specified\n'.format(
            other_opponent_for_selfplay))

    utils.stress_message(
        'Start to training from version: {}'.format(iteration_count), True)
    evolve(model_path, other_opponent_for_selfplay, iteration_count,
           best_model)
예제 #12
0
def train():
    # Get Data
    images, labels = load_all_images_and_labels()

    # Shuffle
    utils.shuffle_data(images, labels)

    print()
    print('Total number of examples:', len(images))

    # Split for validation
    train_images, val_images = np.split(images, [-NUM_VAL_DATA])
    train_labels, val_labels = np.split(labels, [-NUM_VAL_DATA])

    val_images, val_labels = np.copy(val_images), np.copy(val_labels)
    val_images, val_labels = utils.preprocess(val_images, val_labels)

    print('Number of training examples:', len(train_images))
    print('Number of validation examples:', len(val_images))
    print()

    # Grid search
    learning_rates = sorted(set([LEARNING_RATE, *LR_SEARCH]))
    batch_sizes = [
        BATCH_SIZE, BATCH_SIZE // 2, BATCH_SIZE // 4, BATCH_SIZE * 2,
        BATCH_SIZE * 4
    ]

    for lr in learning_rates:
        for bs in batch_sizes:
            message = 'At {}, Training model with LR {} and Batch Size {}'.format(
                utils.cur_time(), lr, bs)
            utils.stress_message(message, extra_newline=True)

            # Create data generator: preprcessing is done in the generator
            datagen = train_data_generator(train_images, train_labels, bs)

            # Build model and train
            model = build_resnet_model()

            model.compile(optimizer=SGD(lr=lr, momentum=0.9, nesterov=True),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

            model.fit_generator(
                datagen,
                steps_per_epoch=len(train_images) // bs,
                epochs=NUM_EPOCHS,
                validation_data=(val_images, val_labels),
                callbacks=[
                    ReduceLROnPlateau(monitor='val_loss',
                                      patience=4,
                                      verbose=1),
                    EarlyStopping(monitor='val_loss',
                                  min_delta=1e-5,
                                  patience=8,
                                  verbose=1),
                    ModelCheckpoint(
                        'checkpoints/resnet-lr{}-bs{}-epoch{{epoch:02d}}-val_loss{{val_loss:.3f}}-val_acc{{val_acc:.2f}}.h5'
                        .format(lr, bs),
                        save_best_only=True,
                        verbose=1)
                ],
                shuffle=True)

            model.save('../saved_models/resnet_v4_lr{}_bs{}.h5'.format(lr, bs))

            # Load test data
            X_test, y_test = utils.preprocess(*load_val_data())
            test_loss, test_acc = model.evaluate(X_test, y_test)
            utils.stress_message(
                'Test Loss: {}, Test Accuracy: {}'.format(test_loss, test_acc),
                True)
    if verbose:
        print('\nLoading model from path {}'.format(model_path))
    model.load_weights(model_path)
    if verbose:
        print('Model is loaded sucessfully\n')

    return model


def human_agent_match(model_path, verbose=False, tree_tau=DET_TREE_TAU):
    model = load_agent(model_path)
    game = Game(p1_type='ai', p2_type='human', verbose=verbose, model1=model)
    winner = game.start()
    return winner


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('\nUsage: python3 human_vs_ai.py <Model Path> [<tree tau>]\n')
        exit()

    model_path = sys.argv[1]
    tt = DET_TREE_TAU

    if len(sys.argv) == 3:
        tt = float(sys.argv[2])
        utils.stress_message('Using tree_tau {} initially'.format(tt))


    human_agent_match(model_path, verbose=True, tree_tau=tt)