def combine_prev_iters_train_data(board_x, pi_y, v_y, iteration_count): all_board_x, all_pi_y, all_v_y = [], [], [] if len(board_x) > 0 and len(pi_y) > 0 and len(v_y) > 0: all_board_x.append(board_x) all_pi_y.append(pi_y) all_v_y.append(v_y) # Read data from previous iterations for i in range(iteration_count - PAST_ITER_COUNT, iteration_count): if i >= 0: filename = '{}/{}{}.h5'.format(SAVE_TRAIN_DATA_DIR, SAVE_TRAIN_DATA_PREF, i) if not os.path.exists(filename): utils.stress_message('{} does not exist!'.format(filename)) continue with h5py.File(filename, 'r') as H: all_board_x.append(np.copy(H['board_x'])) all_pi_y.append(np.copy(H['pi_y'])) all_v_y.append(np.copy(H['v_y'])) if len(all_board_x) > 0 and len(all_pi_y) > 0 and len(all_v_y) > 0: # Make a pool of training data from previous iterations board_x = np.vstack(all_board_x) pi_y = np.vstack(all_pi_y) v_y = np.hstack(all_v_y) # hstack as v_y is 1D array return board_x, pi_y, v_y, len( all_board_x) # Last retval is total iterations used # If no data at all: return empty training data return [], [], [], 0
def save_weights(self, save_dir, prefix, version): if not os.path.exists(save_dir): os.makedirs(save_dir) self.model.save_weights('{}/{}{:0>4}-weights.h5'.format( save_dir, prefix, version)) utils.stress_message( 'Saved model weights "{}{:0>4}-weights" to "{}"'.format( prefix, version, save_dir), True)
def agent_greedy_match(model_path, num_games, verbose=False, tree_tau=DET_TREE_TAU): player1 = 'ai' player2 = 'greedy' win_count = {player1: 0, player2: 0} model = load_agent(model_path) for i in range(num_games): if verbose: utils.stress_message('Game {}'.format(i + 1)) if player1 == 'ai': game = Game(p1_type=player1, p2_type=player2, verbose=verbose, model1=model) else: game = Game(p1_type=player1, p2_type=player2, verbose=verbose, model2=model) winner = game.start() if winner is not None: if winner == PLAYER_ONE: win_count[player1] += 1 else: win_count[player2] += 1 # Swap player1, player2 = player2, player1 if verbose: utils.stress_message( 'Agent wins {} games and Greedy wins {} games with total games {}'. format(win_count['ai'], win_count['greedy'], num_games)) if win_count['ai'] > win_count['greedy']: return model_path elif win_count['greedy'] > win_count['ai']: return 'greedy' else: return None
def train(model_path, board_x, pi_y, v_y, data_retention, version): # Set TF gpu limit import tensorflow as tf tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True session = tf.Session(config=tf_config) from keras.backend.tensorflow_backend import set_session set_session(session=session) np.random.seed() random.seed() message = 'At {}, Training Version {}, Number of examples: {} (retaining {:.1f}%)' \ .format(utils.cur_time(), version, len(board_x), data_retention * 100) utils.stress_message(message, True) # Make sure path is not null if we are not training from scratch cur_model = ResidualCNN() if version > 0: assert model_path is not None cur_model.load_weights(model_path) # Sample a portion of training data before training sampled_idx = np.random.choice(len(board_x), int(data_retention * len(board_x)), replace=False) sampled_board_x = board_x[sampled_idx] sampled_pi_y = pi_y[sampled_idx] sampled_v_y = v_y[sampled_idx] cur_model.model.fit(sampled_board_x, [sampled_pi_y, sampled_v_y], batch_size=BATCH_SIZE, validation_split=0.05, epochs=EPOCHS, shuffle=True) cur_model.save_weights(SAVE_WEIGHTS_DIR, MODEL_PREFIX, version)
def generate_self_play_in_parallel(model_path, num_self_play, num_workers, model2_path=None): # Process pool for parallelism process_pool = mp.Pool(processes=num_workers) work_share = num_self_play // num_workers worker_results = [] # Send processes to generate self plays for i in range(num_workers): if i == num_workers - 1: work_share += (num_self_play % num_workers) # Send workers result_async = process_pool.apply_async(generate_self_play, args=(i + 1, model_path, work_share, model2_path)) worker_results.append(result_async) try: # Join processes and summarise the generated final list of games game_list = [] for result in worker_results: game_list += result.get() process_pool.close() # Exit early if need except KeyboardInterrupt: utils.stress_message('SIGINT caught, exiting') process_pool.terminate() process_pool.join() exit() process_pool.join() return game_list
def train(num_games, model, version): # print some useful message message = 'At {}, Starting to generate {} greedy self-play games for version {}'.format( utils.cur_time(), num_games, version) utils.stress_message(message, True) # Generate games games = generate_self_play_in_parallel(num_games, NUM_WORKERS) utils.stress_message('Preparing training examples from {} games'.format( len(games))) # Convert self-play games to training data board_x, pi_y, v_y = utils.convert_to_train_data(games) board_x, pi_y, v_y = utils.augment_train_data(board_x, pi_y, v_y) assert len(board_x) == len(pi_y) == len(v_y) print('\nNumber of training examples (Total): {}'.format(len(board_x))) # Sample a portion of training data num_train_data = int(G_DATA_RETENTION_RATE * len(board_x)) sampled_idx = np.random.choice(len(board_x), num_train_data, replace=False) board_x_train = np.array( [board_x[sampled_idx[i]] for i in range(num_train_data)]) pi_y_train = np.array( [pi_y[sampled_idx[i]] for i in range(num_train_data)]) v_y_train = np.array([v_y[sampled_idx[i]] for i in range(num_train_data)]) # board_x_val = np.array([board_x[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)]) # pi_y_val = np.array([pi_y[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)]) # v_y_val = np.array([v_y[sampled_idx[i]] for i in range(num_train_data, num_train_data + G_NUM_VAL_DATA)]) assert len(board_x_train) == len(pi_y_train) == len(v_y_train) print('Number of training examples (Sampled): {}\n'.format( len(board_x_train))) # Make sure that the directory is available if not os.path.exists(SAVE_WEIGHTS_DIR): os.makedirs(SAVE_WEIGHTS_DIR) model.model.fit( board_x_train, [pi_y_train, v_y_train], # validation_data=((board_x_val, [pi_y_val, v_y_val]) if G_NUM_VAL_DATA > 0 else None), validation_split=G_VAL_SPLIT, batch_size=G_BATCH_SIZE, epochs=G_ITER_PER_EPOCH, shuffle=True) # callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.001, patience=5), # ModelCheckpoint(filepath=SAVE_WEIGHTS_DIR+'GreedyWeights-ep{epoch:02d}-val{val_loss:.2f}.h5', # save_best_only=True, save_weights_only=True)]) model.save_weights(SAVE_WEIGHTS_DIR, G_MODEL_PREFIX, version=version) utils.stress_message( 'GreedyModel Weights version {} saved to {}'.format( version, SAVE_WEIGHTS_DIR), True)
def evaluate_in_parallel(model1, model2, num_games, num_workers): if model2 is not None: utils.stress_message( 'Evaluating model "{}" against model "{}" on {} games'.format( model1, model2, num_games), True) # Process pool for parallelism process_pool = mp.Pool(processes=num_workers) work_share = num_games // num_workers worker_results = [] # Send processes to generate self plays for i in range(num_workers): if i == num_workers - 1: work_share += (num_games % num_workers) # Send workers result_async = process_pool.apply_async(evaluate, args=(i + 1, model1, model2, work_share)) worker_results.append(result_async) try: # Join processes and count games model1_wincount = model2_wincount = draw_count = 0 for result in worker_results: game_stats = result.get() model1_wincount += game_stats[0] model2_wincount += game_stats[1] draw_count += game_stats[2] process_pool.close() # Exit early if need except KeyboardInterrupt: utils.stress_message('SIGINT caught, exiting') process_pool.terminate() process_pool.join() exit() process_pool.join() utils.stress_message( 'Overall, model1 "{}" wins {}/{} against model2 "{}"'.format( model1, model1_wincount, num_games, model2), True) return model1_wincount, model2_wincount, draw_count
def evaluate_in_parallel(best_model, cur_model, num_games, num_workers): utils.stress_message( 'Evaluating model "{}" against current best model "{}" on {} games'. format(cur_model, best_model, num_games), True) # Process pool for parallelism process_pool = mp.Pool(processes=num_workers) work_share = num_games // num_workers worker_results = [] # Send processes to generate self plays for i in range(num_workers): if i == num_workers - 1: work_share += (num_games % num_workers) # Send workers result_async = process_pool.apply_async(evaluate, args=(i + 1, best_model, cur_model, work_share)) worker_results.append(result_async) try: # Join processes and count games cur_model_wincount = 0 for result in worker_results: cur_model_wincount += result.get() process_pool.close() # Exit early if need except KeyboardInterrupt: utils.stress_message('SIGINT caught, exiting') process_pool.terminate() process_pool.join() exit() process_pool.join() utils.stress_message( 'Overall, cur_model "{}" wins {}/{} against best_model "{}"'.format( cur_model, cur_model_wincount, num_games, best_model), True) return cur_model_wincount
import utils import numpy as np from game import Game from config import * """ Run this file directly from terminal if you want to play human-vs-greedy game """ if __name__ == '__main__': count = {PLAYER_ONE: 0, PLAYER_TWO: 0} num_games = 50 end_states = [] for i in range(num_games): utils.stress_message('Game {}'.format(i + 1)) game = Game(p1_type='greedy', p2_type='greedy', verbose=False) winner = game.start() if winner is not None: count[winner] += 1 end_states.append(game.board.board[..., 0]) unique_states = np.unique(np.array(end_states), axis=0) print('\n{} end game states, {} of them is unique\n'.format( num_games, len(unique_states))) print('Player {} wins {} matches'.format(PLAYER_ONE, count[PLAYER_ONE])) print('Player {} wins {} matches'.format(PLAYER_TWO, count[PLAYER_TWO]))
def evolve(cur_model_path, other_opponent_for_selfplay, iteration_count, best_model): while True: # print some useful message message = 'At {}, Starting to generate self-plays for Version {}'.format( utils.cur_time(), iteration_count) utils.stress_message(message, True) ########################## ##### GENERATE PLAYS ##### ########################## if other_opponent_for_selfplay is not None: print( '(Generating games using current model {} and other model {})'. format(cur_model_path, other_opponent_for_selfplay)) games = generate_self_play_in_parallel( cur_model_path, NUM_SELF_PLAY, NUM_WORKERS, model2_path=other_opponent_for_selfplay) elif best_model is not None: print('(Generating games using given best model: {})'.format( best_model)) games = generate_self_play_in_parallel(best_model, NUM_SELF_PLAY, NUM_WORKERS) else: # # Use previous version to generate selfplay if necessary # model2_path = None # if SELF_PLAY_DIFF_MODEL and ITERATION_COUNT > 1: # model2_version = get_rand_prev_version(ITERATION_COUNT) # model2_path = get_weights_path_from_version(model2_version) # utils.stress_message('.. and vs. Version {}'.format(model2_version)) # # games = generate_self_play_in_parallel(cur_model_path, NUM_SELF_PLAY, NUM_WORKERS, model2_path) games = generate_self_play_in_parallel(cur_model_path, NUM_SELF_PLAY, NUM_WORKERS) ########################## ##### PREPARING DATA ##### ########################## # Convert self-play games to training data board_x, pi_y, v_y = utils.convert_to_train_data(games) board_x, pi_y, v_y = utils.augment_train_data(board_x, pi_y, v_y) # Numpyify and save for later iterations board_x, pi_y, v_y = np.array(board_x), np.array(pi_y), np.array(v_y) if len(board_x) > 0 and len(pi_y) > 0 and len(v_y) > 0: utils.save_train_data(board_x, pi_y, v_y, version=iteration_count) # Get prev iters training data board_x, pi_y, v_y, data_iters_used = combine_prev_iters_train_data( board_x, pi_y, v_y, iteration_count) assert len(board_x) == len(pi_y) == len(v_y) # Train only if there were data if data_iters_used == 0: utils.stress_message( 'No training data for iteration {}! Re-iterating...'.format( iteration_count)) continue # Calculate training set retention rate including current iteration; use default if too high data_retention_rate = min(1. / data_iters_used, DEF_DATA_RETENTION_RATE) ################# ##### TRAIN ##### ################# # Use a *new process* to train since we DONT want to load TF in the parent process training_process = mp.Process(target=train, args=(cur_model_path, board_x, pi_y, v_y, data_retention_rate, iteration_count)) training_process.start() training_process.join() # Update path variable since we made a new version # cur_model_path = get_model_path_from_version(ITERATION_COUNT) cur_model_path = get_weights_path_from_version(iteration_count) #################### ##### EVALUATE ##### #################### if best_model is not None: cur_model_wincount = evaluate_in_parallel(best_model, cur_model_path, EVAL_GAMES, NUM_WORKERS) if cur_model_wincount > int(0.55 * EVAL_GAMES): best_model = cur_model_path utils.stress_message( 'Now using {} as the best model'.format(best_model)) else: utils.stress_message( 'Output model of this iteration is not better; retaining {} as the best model' .format(best_model), True) # Update version number iteration_count += 1
dest='other_opponent_for_selfplay', help= 'boolean to specify selfplay generated by both current model and best model or not, \ if not generate with only best model') return parser if __name__ == '__main__': parser = build_parser() args = parser.parse_args() model_path = args.model_path best_model = args.best_model_path other_opponent_for_selfplay = args.other_opponent_for_selfplay try: # Read the count from file name iteration_count = utils.find_version_given_filename(model_path) + 1 except: iteration_count = 0 if best_model is not None: print('\nBest model {} specified!\n'.format(best_model)) if other_opponent_for_selfplay is not None: print('\nOpponent {} for selfplay specified\n'.format( other_opponent_for_selfplay)) utils.stress_message( 'Start to training from version: {}'.format(iteration_count), True) evolve(model_path, other_opponent_for_selfplay, iteration_count, best_model)
def train(): # Get Data images, labels = load_all_images_and_labels() # Shuffle utils.shuffle_data(images, labels) print() print('Total number of examples:', len(images)) # Split for validation train_images, val_images = np.split(images, [-NUM_VAL_DATA]) train_labels, val_labels = np.split(labels, [-NUM_VAL_DATA]) val_images, val_labels = np.copy(val_images), np.copy(val_labels) val_images, val_labels = utils.preprocess(val_images, val_labels) print('Number of training examples:', len(train_images)) print('Number of validation examples:', len(val_images)) print() # Grid search learning_rates = sorted(set([LEARNING_RATE, *LR_SEARCH])) batch_sizes = [ BATCH_SIZE, BATCH_SIZE // 2, BATCH_SIZE // 4, BATCH_SIZE * 2, BATCH_SIZE * 4 ] for lr in learning_rates: for bs in batch_sizes: message = 'At {}, Training model with LR {} and Batch Size {}'.format( utils.cur_time(), lr, bs) utils.stress_message(message, extra_newline=True) # Create data generator: preprcessing is done in the generator datagen = train_data_generator(train_images, train_labels, bs) # Build model and train model = build_resnet_model() model.compile(optimizer=SGD(lr=lr, momentum=0.9, nesterov=True), loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator( datagen, steps_per_epoch=len(train_images) // bs, epochs=NUM_EPOCHS, validation_data=(val_images, val_labels), callbacks=[ ReduceLROnPlateau(monitor='val_loss', patience=4, verbose=1), EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=8, verbose=1), ModelCheckpoint( 'checkpoints/resnet-lr{}-bs{}-epoch{{epoch:02d}}-val_loss{{val_loss:.3f}}-val_acc{{val_acc:.2f}}.h5' .format(lr, bs), save_best_only=True, verbose=1) ], shuffle=True) model.save('../saved_models/resnet_v4_lr{}_bs{}.h5'.format(lr, bs)) # Load test data X_test, y_test = utils.preprocess(*load_val_data()) test_loss, test_acc = model.evaluate(X_test, y_test) utils.stress_message( 'Test Loss: {}, Test Accuracy: {}'.format(test_loss, test_acc), True)
if verbose: print('\nLoading model from path {}'.format(model_path)) model.load_weights(model_path) if verbose: print('Model is loaded sucessfully\n') return model def human_agent_match(model_path, verbose=False, tree_tau=DET_TREE_TAU): model = load_agent(model_path) game = Game(p1_type='ai', p2_type='human', verbose=verbose, model1=model) winner = game.start() return winner if __name__ == '__main__': if len(sys.argv) < 2: print('\nUsage: python3 human_vs_ai.py <Model Path> [<tree tau>]\n') exit() model_path = sys.argv[1] tt = DET_TREE_TAU if len(sys.argv) == 3: tt = float(sys.argv[2]) utils.stress_message('Using tree_tau {} initially'.format(tt)) human_agent_match(model_path, verbose=True, tree_tau=tt)