def evolve_solution(game_name, action_space_size, pop_size=1000+1, archive_p=0.01, archive_dump=None, n_generations=1000, training_frames=20000, training_episodes=(0,), starting_generation=0, T=40, archive=None, population=None, validation_frames=10000, validation_episodes=(1,)): in_shape = (84,84,4) out_shape = (action_space_size,) init_connect_rate = 1.0 num_episodes = len(training_episodes) if population is None: population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate) if archive is None: archive = [[] for _ in range(num_episodes)] rnd_int = np.random.RandomState(seed=1).randint rnd_uniform = np.random.RandomState(seed=1).uniform mean_game_scores = [] best_game_scores = [] mean_validation_scores = [] for gen in range(starting_generation, n_generations): print('\nGeneration', gen) save_population(population, gen, exp_name) # Clear archive, maybe if not archive_dump is None: if gen % archive_dump == 0: archive = [[] for _ in range(num_episodes)] # PARALLEL results = Parallel(n_jobs=num_cores)( delayed(play_atari)( ind, game_name, training_frames, training_episodes) for ind in population ) # Randomly select archive_p*pop_size agent behaviours for archiving archive_indices = np.random.choice(list(range(len(results))), size=int(archive_p*pop_size)) for i in archive_indices: _, _, actions_performed = results[i] for episode in range(num_episodes): episode_actions_performed = actions_performed[episode] archive[episode].append(''.join(str(i) for i in episode_actions_performed)) save_archive(archive, gen, exp_name) # Computing novelty scores print('Computing novelty scores.') results_novelty = [] for ind, game_score, actions_performed in results: results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive)) # Retrieve game scores game_scores = [x[1] for x in results_novelty] # Compute mean game score mean_game_score = np.mean(game_scores) mean_game_scores.append(mean_game_score) # Find most novel individual results_novelty = sorted(results_novelty, key=lambda x: x[3]) n_elite_ind, n_elite_game_score, n_elite_actions_performed, n_elite_novelty_score = results_novelty[-1] # Find individual with best game score g_elite_ind, g_elite_game_score, g_elite_actions_performed, g_elite_novelty_score = \ sorted(results_novelty, key=lambda x: x[1])[-1] best_game_scores.append(g_elite_game_score) elite_novelty = n_elite_ind.copy() file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, g_elite_game_score), 'w') file.write(g_elite_ind.to_JSON()) file.flush() file.close() file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, n_elite_game_score), 'w') file.write(n_elite_ind.to_JSON()) file.flush() file.close() print('Best Score (Novelty): {} {:.2f} {:.2f}'.format(n_elite_ind, n_elite_game_score, n_elite_novelty_score)) print('Best Score (Game Score): {} {:.2f} {:.2f}'.format(g_elite_ind, g_elite_game_score, g_elite_novelty_score)) print('Previous Mean Game Scores: {}'.format(mean_game_scores)) print('Current Mean Game Score: {:.2f}'.format(mean_game_score)) print('Previous Best Game Scores: {}'.format(best_game_scores)) print('Current Best Game Score: {}'.format(best_game_scores[-1])) # Truncate based on novelty score pop_trunc = [result for result in results_novelty[-T:]] # Then select half with highest game score (Not in main experiment) pop_trunc = sorted(pop_trunc, key=lambda x: x[1])[-T // 2:] # Use cross-validation to select elite validation_pop = [result[0] for result in sorted(pop_trunc, key=lambda x: x[1])[-2:]] print([(x.init_seed, ns) for x, _, _, ns in pop_trunc[-2:]]) validation_runs = product(validation_pop, validation_episodes) # print([(x.init_seed, y) for x,y in validation_runs]) print('Running Validation Episodes.') validation_results = Parallel(n_jobs=num_cores)( delayed(play_atari)( ind, game_name, validation_frames, [episode]) for ind, episode in validation_runs ) # Collect scores by individual validation_scores = [] for ind in validation_pop: validation_scores.append((ind, np.mean([v_score for v_ind, v_score, _ in validation_results if v_ind == ind]))) validation_scores = sorted(validation_scores, key=lambda x: x[1]) print([(x.init_seed, y) for x,y in validation_scores]) # Sort based on game score and select top individual top_validation_ind, top_validation_score = validation_scores[-1] # Add best-generalizing individual to reproduction population # pop_trunc.append((top_validation_ind, None, None, None)) mean_validation_scores.append(top_validation_score) print('Previous Mean Validation Scores: {}'.format(mean_validation_scores)) print('Current Mean Validation Score Over {} Episodes: {:.2f}'.format(len(validation_episodes), mean_validation_scores[-1])) file = open('./{}/{}-elite_validation-{}-mean_score-{}.txt'.format(exp_name, game_name, gen, mean_validation_scores[-1]), 'w') file.write(top_validation_ind.to_JSON()) file.flush() file.close() run_file = open('./{}/run_info.txt'.format(exp_name), 'w') run_file.write('Mean Game Scores\n{}\n'.format(mean_game_scores)) run_file.write('Best Game Scores\n{}\n'.format(best_game_scores)) run_file.write('Mean Validation Scores Over {} Episodes\n{}\n'.format(len(validation_episodes), mean_validation_scores)) run_file.flush() run_file.close() if gen == n_generations - 1: return g_elite_game_score # Initialize population with elite ind new_pop = [top_validation_ind] for _ in range(pop_size - 1): offspring, _, _, _ = pop_trunc[rnd_int(low=0, high=len(pop_trunc))] offspring = offspring.copy() mutate(offspring) new_pop.append(offspring) population = new_pop gc.collect() break
def evolve_solution(game_name, action_space_size, pop_size=1000+1, archive_p=0.01, archive_dump=None, n_generations=1000, training_frames=20000, training_episodes=(0,), starting_generation=0, T=40, archive=None, population=None, validation_frames=10000, validation_episodes=(1,), validation_archive=None, validation_archive_p=1.0): in_shape = (84,84,4) out_shape = (action_space_size,) init_connect_rate = 1.0 num_episodes = len(training_episodes) num_validataion_episodes = len(validation_episodes) if population is None: population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate) if archive is None: archive = [[] for _ in range(num_episodes)] rnd_int = np.random.RandomState(seed=1).randint rnd_uniform = np.random.RandomState(seed=1).uniform mean_training_game_scores = [] best_training_game_scores = [] best_validation_game_scores = [] for gen in range(starting_generation, n_generations): print('\nGeneration', gen) save_population(population, gen, exp_name) # Clear archive, maybe if not archive_dump is None: if gen % archive_dump == 0: archive = [[] for _ in range(num_episodes)] # PARALLEL results = Parallel(n_jobs=num_cores)( delayed(play_atari)( ind, game_name, training_frames, training_episodes) for ind in population ) # Retrieve game scores game_scores = [x[1] for x in results] # Compute mean game score mean_game_score = np.mean(game_scores) mean_training_game_scores.append(mean_game_score) # Randomly select archive_p*pop_size agent behaviours for archiving archive_indices = np.random.choice(list(range(len(results))), size=int(archive_p*pop_size)) for i in archive_indices: _, _, actions_performed = results[i] for episode in range(num_episodes): episode_actions_performed = actions_performed[episode] archive[episode].append(''.join(str(i) for i in episode_actions_performed)) save_archive(archive, gen, exp_name) # Find highest scoring individuals and truncate results_sorted = sorted(results, key=lambda x: x[1]) results_trunc = [result for result in results_sorted[-T:]] best_training_ind_gs = results_sorted[-1][0] # Store top training game score best_training_game_score = results_sorted[-1][1] best_training_game_scores.append(best_training_game_score) # Compute novelty scores for reproduction candidates print('Computing novelty scores for reproduction candidates.') results_novelty = [] for ind, game_score, actions_performed in results_trunc: results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive)) # Find most novel reproduction candidates and truncate results_novelty_sorted = sorted(results_novelty, key=lambda x: x[3]) results_novelty_trunc = results_novelty_sorted[-T // 2:] best_training_ind_novelty = results_novelty_sorted[-1][0] best_training_novelty_game_score = results_novelty_sorted[-1][1] best_training_novelty_novelty_score = results_novelty_sorted[-1][3] # Get reproduction candidates from results and include training elites validation_pop = [ind for ind, _, _, _ in results_novelty_trunc] + [best_training_ind_gs] + [best_training_ind_novelty] validation_runs = product(validation_pop, validation_episodes) print('Running Validation Episodes.') validation_results = Parallel(n_jobs=num_cores)( delayed(play_atari)( ind, game_name, validation_frames, [episode]) for ind, episode in validation_runs ) # Recombine validation results into (ind, score) pairs validation_pairs = [] for ind in validation_pop: validation_pairs.append( (ind, np.mean([v_score for v_ind, v_score, _ in validation_results if v_ind == ind]))) validation_pairs_sorted = sorted(validation_pairs, key=lambda x: x[1]) # Select top-scoring individual from validation and save score best_validation_ind, best_validation_score = validation_pairs_sorted[-1] best_validation_game_scores.append(best_validation_score) # Save run info file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, best_training_game_score), 'w') file.write(best_training_ind_gs.to_JSON()) file.flush() file.close() file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, best_training_novelty_game_score), 'w') file.write(best_training_ind_novelty.to_JSON()) file.flush() file.close() file = open('./{}/{}-elite_validation-{}-game_score-{}.txt'.format(exp_name, game_name, gen, best_validation_score), 'w') file.write(best_validation_ind.to_JSON()) file.flush() file.close() print('Best Score (Game Score, Novelty): {} {:.2f}, {:.2f}'.format(best_training_ind_novelty, best_training_novelty_game_score, best_training_novelty_novelty_score)) print('Best Score (Game Score): {} {:.2f}'.format(best_training_ind_gs, best_training_game_score)) print('Previous Mean Game Scores: {}'.format(mean_training_game_scores)) print('Current Mean Game Score: {:.2f}'.format(mean_game_score)) print('Previous Best Game Scores: {}'.format(best_training_game_scores)) print('Current Best Game Score: {}'.format(best_training_game_scores[-1])) print('Previous Best Validation Game Scores: {}'.format(best_validation_game_scores)) print('Current Best Validation Game Score Over {} Episodes: {:.2f}'.format(len(validation_episodes), best_validation_score)) run_file = open('./{}/run_info.txt'.format(exp_name), 'w') run_file.write('Mean Game Scores\n{}\n'.format(mean_training_game_scores)) run_file.write('Best Game Scores\n{}\n'.format(best_training_game_scores)) run_file.write('Best Validation Game Scores Over {} Episodes\n{}\n'.format(len(validation_episodes), best_validation_game_scores)) run_file.flush() run_file.close() if gen == n_generations - 1: return best_training_ind_gs # Initialize population with elite ind new_pop = [best_validation_ind] for _ in range(pop_size - 1): offspring = validation_pop[rnd_int(low=0, high=len(validation_pop))] offspring = offspring.copy() mutate(offspring) new_pop.append(offspring) population = new_pop gc.collect()
def evolve_solution(game_name, action_space_size, archive_p=0.1): global elite_g in_shape = (28224,) out_shape = (action_space_size,) init_connect_rate = 1.0 pop_size = 500 + 1 generations = 1000 starting_generation = 0 game_iterations = 5000 population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate) # population, starting_generation = load_population('./{}/hybrid_exp-499.pkl'.format(exp_name)) starting_generation = 0 env_seeds = [0, 1] num_episodes = len(env_seeds) rnd_int = np.random.randint T = 100 elite = None archive = [[] for _ in range(num_episodes)] # archive = load_archive('./{}/archive-hybrid_exp-499.pkl'.format(exp_name)) mean_game_scores = [] best_game_scores = [] rand = np.random.RandomState(seed=1).uniform for gen in range(starting_generation, generations): print('\nGeneration', gen) save_population(population, gen, exp_name) # Clear archive after every 100 generations if gen % 100 == 0: archive = [[] for _ in range(num_episodes)] # PARALLEL results = Parallel(n_jobs=num_cores)( delayed(play_atari)( ind, game_name, game_iterations, env_seeds) for ind in population ) for _, _, actions_performed in results: if rand() < archive_p: for episode in range(num_episodes): episode_actions_performed = actions_performed[episode] archive[episode].append(''.join(str(i) for i in episode_actions_performed)) save_archive(archive, gen, exp_name) # Computing Novelty Scores print('Computing novelty scores.') # print('Archive Size: {}'.format(len(archive))) results_novelty = [] # archive partitions for ind, game_score, actions_performed in results: results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive)) game_scores = [x[1] for x in results_novelty] # Compute mean game score mean_game_score = np.mean(game_scores) mean_game_scores.append(mean_game_score) # Find Most Novel Individual results_novelty = sorted(results_novelty, key=lambda x: x[3]) n_elite_ind, n_elite_game_score, n_elite_actions_performed, n_elite_novelty_score = results_novelty[-1] # Find Best Scoring Individual g_elite_ind, g_elite_game_score, g_elite_actions_performed, g_elite_novelty_score = \ sorted(results_novelty, key=lambda x: x[1])[-1] best_game_scores.append(g_elite_game_score) elite = n_elite_ind.copy() file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, g_elite_game_score), 'w') file.write(g_elite_ind.to_JSON()) file.flush() file.close() file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, n_elite_game_score), 'w') file.write(n_elite_ind.to_JSON()) file.flush() file.close() print('Best Score (Novelty): {} {} {}'.format(n_elite_ind, n_elite_game_score, n_elite_novelty_score)) print('Best Score (Game Score): {} {} {}'.format(g_elite_ind, g_elite_game_score, g_elite_novelty_score)) print('Previous Mean Game Scores: {}'.format(mean_game_scores)) print('Current Mean Game Score: {}'.format(mean_game_score)) print('Previous Best Game Scores: {}'.format(best_game_scores)) print('Current Best Game Score: {}'.format(best_game_scores[-1])) run_file = open('./{}/run_info.txt'.format(exp_name), 'w') run_file.write('Mean Game Scores\n{}\n'.format(mean_game_scores)) run_file.write('Best Game Scores\n{}\n'.format(best_game_scores)) run_file.flush() run_file.close() # Truncate based on novelty score pop_trunc = [result for result in results_novelty[-T:]] # Then select half with highest game score pop_trunc = sorted(pop_trunc, key=lambda x: x[1])[-T // 2:] if gen == generations - 1: return elite new_pop = [elite] for _ in range(pop_size - 1): offspring, _, _, _ = pop_trunc[rnd_int(low=0, high=len(pop_trunc))] offspring = offspring.copy() mutate(offspring) new_pop.append(offspring) population = new_pop gc.collect()