def evolve_solution(game_name, action_space_size, pop_size=1000+1, archive_p=0.01, archive_dump=None, n_generations=1000,
                    training_frames=20000, training_episodes=(0,), starting_generation=0, T=40, archive=None, population=None,
                    validation_frames=10000, validation_episodes=(1,)):

    in_shape = (84,84,4)
    out_shape = (action_space_size,)
    init_connect_rate = 1.0

    num_episodes = len(training_episodes)

    if population is None:
        population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate)


    if archive is None:
        archive = [[] for _ in range(num_episodes)]

    rnd_int = np.random.RandomState(seed=1).randint
    rnd_uniform = np.random.RandomState(seed=1).uniform

    mean_game_scores = []
    best_game_scores = []
    mean_validation_scores = []

    for gen in range(starting_generation, n_generations):

        print('\nGeneration', gen)
        save_population(population, gen, exp_name)

        # Clear archive, maybe
        if not archive_dump is None:
            if gen % archive_dump == 0:
                archive = [[] for _ in range(num_episodes)]

        # PARALLEL
        results = Parallel(n_jobs=num_cores)(
            delayed(play_atari)(
                ind, game_name, training_frames, training_episodes) for ind in population
        )

        # Randomly select archive_p*pop_size agent behaviours for archiving
        archive_indices = np.random.choice(list(range(len(results))), size=int(archive_p*pop_size))
        for i in archive_indices:
            _, _, actions_performed = results[i]
            for episode in range(num_episodes):
                episode_actions_performed = actions_performed[episode]
                archive[episode].append(''.join(str(i) for i in episode_actions_performed))

        save_archive(archive, gen, exp_name)

        # Computing novelty scores
        print('Computing novelty scores.')
        results_novelty = []
        for ind, game_score, actions_performed in results:
            results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive))

        # Retrieve game scores
        game_scores = [x[1] for x in results_novelty]

        # Compute mean game score
        mean_game_score = np.mean(game_scores)
        mean_game_scores.append(mean_game_score)

        # Find most novel individual
        results_novelty = sorted(results_novelty, key=lambda x: x[3])
        n_elite_ind, n_elite_game_score, n_elite_actions_performed, n_elite_novelty_score = results_novelty[-1]

        # Find individual with best game score
        g_elite_ind, g_elite_game_score, g_elite_actions_performed, g_elite_novelty_score = \
            sorted(results_novelty, key=lambda x: x[1])[-1]

        best_game_scores.append(g_elite_game_score)

        elite_novelty = n_elite_ind.copy()

        file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, g_elite_game_score), 'w')
        file.write(g_elite_ind.to_JSON())
        file.flush()
        file.close()
        file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, n_elite_game_score), 'w')
        file.write(n_elite_ind.to_JSON())
        file.flush()
        file.close()

        print('Best Score (Novelty): {} {:.2f} {:.2f}'.format(n_elite_ind, n_elite_game_score, n_elite_novelty_score))
        print('Best Score (Game Score): {} {:.2f} {:.2f}'.format(g_elite_ind, g_elite_game_score, g_elite_novelty_score))
        print('Previous Mean Game Scores: {}'.format(mean_game_scores))
        print('Current Mean Game Score: {:.2f}'.format(mean_game_score))
        print('Previous Best Game Scores: {}'.format(best_game_scores))
        print('Current Best Game Score: {}'.format(best_game_scores[-1]))

        # Truncate based on novelty score
        pop_trunc = [result for result in results_novelty[-T:]]

        # Then select half with highest game score (Not in main experiment)
        pop_trunc = sorted(pop_trunc, key=lambda x: x[1])[-T // 2:]

        # Use cross-validation to select elite
        validation_pop = [result[0] for result in sorted(pop_trunc, key=lambda x: x[1])[-2:]]
        print([(x.init_seed, ns) for x, _, _, ns in pop_trunc[-2:]])
        validation_runs = product(validation_pop, validation_episodes)
        # print([(x.init_seed, y) for x,y in validation_runs])

        print('Running Validation Episodes.')
        validation_results = Parallel(n_jobs=num_cores)(
            delayed(play_atari)(
                ind, game_name, validation_frames, [episode]) for ind, episode in validation_runs
        )

        # Collect scores by individual
        validation_scores = []
        for ind in validation_pop:
            validation_scores.append((ind, np.mean([v_score for v_ind, v_score, _ in validation_results if v_ind == ind])))
        validation_scores = sorted(validation_scores, key=lambda x: x[1])

        print([(x.init_seed, y) for x,y in validation_scores])

        # Sort based on game score and select top individual
        top_validation_ind, top_validation_score = validation_scores[-1]

        # Add best-generalizing individual to reproduction population
        # pop_trunc.append((top_validation_ind, None, None, None))

        mean_validation_scores.append(top_validation_score)
        print('Previous Mean Validation Scores: {}'.format(mean_validation_scores))
        print('Current Mean Validation Score Over {} Episodes: {:.2f}'.format(len(validation_episodes), mean_validation_scores[-1]))

        file = open('./{}/{}-elite_validation-{}-mean_score-{}.txt'.format(exp_name, game_name, gen, mean_validation_scores[-1]), 'w')
        file.write(top_validation_ind.to_JSON())
        file.flush()
        file.close()

        run_file = open('./{}/run_info.txt'.format(exp_name), 'w')
        run_file.write('Mean Game Scores\n{}\n'.format(mean_game_scores))
        run_file.write('Best Game Scores\n{}\n'.format(best_game_scores))
        run_file.write('Mean Validation Scores Over {} Episodes\n{}\n'.format(len(validation_episodes), mean_validation_scores))
        run_file.flush()
        run_file.close()

        if gen == n_generations - 1:
            return g_elite_game_score
        # Initialize population with elite ind
        new_pop = [top_validation_ind]
        for _ in range(pop_size - 1):
            offspring, _, _, _ = pop_trunc[rnd_int(low=0, high=len(pop_trunc))]
            offspring = offspring.copy()
            mutate(offspring)
            new_pop.append(offspring)

        population = new_pop
        gc.collect()
        break
示例#2
0
def evolve_solution(game_name, action_space_size, pop_size=1000+1, archive_p=0.01, archive_dump=None, n_generations=1000,
                    training_frames=20000, training_episodes=(0,), starting_generation=0, T=40, archive=None, population=None,
                    validation_frames=10000, validation_episodes=(1,), validation_archive=None, validation_archive_p=1.0):

    in_shape = (84,84,4)
    out_shape = (action_space_size,)
    init_connect_rate = 1.0

    num_episodes = len(training_episodes)
    num_validataion_episodes = len(validation_episodes)

    if population is None:
        population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate)

    if archive is None:
        archive = [[] for _ in range(num_episodes)]

    rnd_int = np.random.RandomState(seed=1).randint
    rnd_uniform = np.random.RandomState(seed=1).uniform

    mean_training_game_scores = []
    best_training_game_scores = []
    best_validation_game_scores = []

    for gen in range(starting_generation, n_generations):

        print('\nGeneration', gen)
        save_population(population, gen, exp_name)

        # Clear archive, maybe
        if not archive_dump is None:
            if gen % archive_dump == 0:
                archive = [[] for _ in range(num_episodes)]

        # PARALLEL
        results = Parallel(n_jobs=num_cores)(
            delayed(play_atari)(
                ind, game_name, training_frames, training_episodes) for ind in population
        )

        # Retrieve game scores
        game_scores = [x[1] for x in results]

        # Compute mean game score
        mean_game_score = np.mean(game_scores)
        mean_training_game_scores.append(mean_game_score)

        # Randomly select archive_p*pop_size agent behaviours for archiving
        archive_indices = np.random.choice(list(range(len(results))), size=int(archive_p*pop_size))
        for i in archive_indices:
            _, _, actions_performed = results[i]
            for episode in range(num_episodes):
                episode_actions_performed = actions_performed[episode]
                archive[episode].append(''.join(str(i) for i in episode_actions_performed))

        save_archive(archive, gen, exp_name)

        # Find highest scoring individuals and truncate
        results_sorted = sorted(results, key=lambda x: x[1])
        results_trunc = [result for result in results_sorted[-T:]]
        best_training_ind_gs = results_sorted[-1][0]

        # Store top training game score
        best_training_game_score = results_sorted[-1][1]
        best_training_game_scores.append(best_training_game_score)

        # Compute novelty scores for reproduction candidates
        print('Computing novelty scores for reproduction candidates.')
        results_novelty = []
        for ind, game_score, actions_performed in results_trunc:
            results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive))

        # Find most novel reproduction candidates and truncate
        results_novelty_sorted = sorted(results_novelty, key=lambda x: x[3])
        results_novelty_trunc = results_novelty_sorted[-T // 2:]
        best_training_ind_novelty = results_novelty_sorted[-1][0]
        best_training_novelty_game_score = results_novelty_sorted[-1][1]
        best_training_novelty_novelty_score = results_novelty_sorted[-1][3]

        # Get reproduction candidates from results and include training elites
        validation_pop = [ind for ind, _, _, _ in results_novelty_trunc] + [best_training_ind_gs] + [best_training_ind_novelty]
        validation_runs = product(validation_pop, validation_episodes)

        print('Running Validation Episodes.')
        validation_results = Parallel(n_jobs=num_cores)(
            delayed(play_atari)(
                ind, game_name, validation_frames, [episode]) for ind, episode in validation_runs
        )

        # Recombine validation results into (ind, score) pairs
        validation_pairs = []
        for ind in validation_pop:
            validation_pairs.append(
                (ind, np.mean([v_score for v_ind, v_score, _ in validation_results if v_ind == ind])))
        validation_pairs_sorted = sorted(validation_pairs, key=lambda x: x[1])

        # Select top-scoring individual from validation and save score
        best_validation_ind, best_validation_score = validation_pairs_sorted[-1]
        best_validation_game_scores.append(best_validation_score)

        # Save run info
        file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, best_training_game_score), 'w')
        file.write(best_training_ind_gs.to_JSON())
        file.flush()
        file.close()

        file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, best_training_novelty_game_score), 'w')
        file.write(best_training_ind_novelty.to_JSON())
        file.flush()
        file.close()

        file = open('./{}/{}-elite_validation-{}-game_score-{}.txt'.format(exp_name, game_name, gen, best_validation_score), 'w')
        file.write(best_validation_ind.to_JSON())
        file.flush()
        file.close()

        print('Best Score (Game Score, Novelty): {} {:.2f}, {:.2f}'.format(best_training_ind_novelty, best_training_novelty_game_score, best_training_novelty_novelty_score))
        print('Best Score (Game Score): {} {:.2f}'.format(best_training_ind_gs, best_training_game_score))
        print('Previous Mean Game Scores: {}'.format(mean_training_game_scores))
        print('Current Mean Game Score: {:.2f}'.format(mean_game_score))
        print('Previous Best Game Scores: {}'.format(best_training_game_scores))
        print('Current Best Game Score: {}'.format(best_training_game_scores[-1]))
        print('Previous Best Validation Game Scores: {}'.format(best_validation_game_scores))
        print('Current Best Validation Game Score Over {} Episodes: {:.2f}'.format(len(validation_episodes), best_validation_score))

        run_file = open('./{}/run_info.txt'.format(exp_name), 'w')
        run_file.write('Mean Game Scores\n{}\n'.format(mean_training_game_scores))
        run_file.write('Best Game Scores\n{}\n'.format(best_training_game_scores))
        run_file.write('Best Validation Game Scores Over {} Episodes\n{}\n'.format(len(validation_episodes), best_validation_game_scores))
        run_file.flush()
        run_file.close()

        if gen == n_generations - 1:
            return best_training_ind_gs
        # Initialize population with elite ind
        new_pop = [best_validation_ind]
        for _ in range(pop_size - 1):
            offspring = validation_pop[rnd_int(low=0, high=len(validation_pop))]
            offspring = offspring.copy()
            mutate(offspring)
            new_pop.append(offspring)

        population = new_pop
        gc.collect()
def evolve_solution(game_name, action_space_size, archive_p=0.1):
    global elite_g
    in_shape = (28224,)
    out_shape = (action_space_size,)
    init_connect_rate = 1.0
    pop_size = 500 + 1
    generations = 1000
    starting_generation = 0
    game_iterations = 5000
    population = generate_init_population(in_shape, out_shape, pop_size, init_connect_rate)
    # population, starting_generation = load_population('./{}/hybrid_exp-499.pkl'.format(exp_name))
    starting_generation = 0

    env_seeds = [0, 1]
    num_episodes = len(env_seeds)
    rnd_int = np.random.randint
    T = 100
    elite = None
    archive = [[] for _ in range(num_episodes)]
    # archive = load_archive('./{}/archive-hybrid_exp-499.pkl'.format(exp_name))
    mean_game_scores = []
    best_game_scores = []
    rand = np.random.RandomState(seed=1).uniform

    for gen in range(starting_generation, generations):

        print('\nGeneration', gen)
        save_population(population, gen, exp_name)

        # Clear archive after every 100 generations
        if gen % 100 == 0:
            archive = [[] for _ in range(num_episodes)]

        # PARALLEL
        results = Parallel(n_jobs=num_cores)(
            delayed(play_atari)(
                ind, game_name, game_iterations, env_seeds) for ind in population
        )

        for _, _, actions_performed in results:
            if rand() < archive_p:
                for episode in range(num_episodes):
                    episode_actions_performed = actions_performed[episode]
                    archive[episode].append(''.join(str(i) for i in episode_actions_performed))

        save_archive(archive, gen, exp_name)

        # Computing Novelty Scores
        print('Computing novelty scores.')
        # print('Archive Size: {}'.format(len(archive)))
        results_novelty = []
        # archive partitions

        for ind, game_score, actions_performed in results:
            results_novelty.append(compute_novelty(ind, game_score, actions_performed, archive))

        game_scores = [x[1] for x in results_novelty]

        # Compute mean game score
        mean_game_score = np.mean(game_scores)
        mean_game_scores.append(mean_game_score)

        # Find Most Novel Individual
        results_novelty = sorted(results_novelty, key=lambda x: x[3])
        n_elite_ind, n_elite_game_score, n_elite_actions_performed, n_elite_novelty_score = results_novelty[-1]

        # Find Best Scoring Individual
        g_elite_ind, g_elite_game_score, g_elite_actions_performed, g_elite_novelty_score = \
            sorted(results_novelty, key=lambda x: x[1])[-1]

        best_game_scores.append(g_elite_game_score)

        elite = n_elite_ind.copy()

        file = open('./{}/{}-elite_game-{}-score-{}.txt'.format(exp_name, game_name, gen, g_elite_game_score), 'w')
        file.write(g_elite_ind.to_JSON())
        file.flush()
        file.close()
        file = open('./{}/{}-elite_novelty-{}-score-{}.txt'.format(exp_name, game_name, gen, n_elite_game_score), 'w')
        file.write(n_elite_ind.to_JSON())
        file.flush()
        file.close()

        print('Best Score (Novelty): {} {} {}'.format(n_elite_ind, n_elite_game_score, n_elite_novelty_score))
        print('Best Score (Game Score): {} {} {}'.format(g_elite_ind, g_elite_game_score, g_elite_novelty_score))
        print('Previous Mean Game Scores: {}'.format(mean_game_scores))
        print('Current Mean Game Score: {}'.format(mean_game_score))
        print('Previous Best Game Scores: {}'.format(best_game_scores))
        print('Current Best Game Score: {}'.format(best_game_scores[-1]))

        run_file = open('./{}/run_info.txt'.format(exp_name), 'w')
        run_file.write('Mean Game Scores\n{}\n'.format(mean_game_scores))
        run_file.write('Best Game Scores\n{}\n'.format(best_game_scores))
        run_file.flush()
        run_file.close()

        # Truncate based on novelty score
        pop_trunc = [result for result in results_novelty[-T:]]
        # Then select half with highest game score
        pop_trunc = sorted(pop_trunc, key=lambda x: x[1])[-T // 2:]

        if gen == generations - 1:
            return elite
        new_pop = [elite]
        for _ in range(pop_size - 1):
            offspring, _, _, _ = pop_trunc[rnd_int(low=0, high=len(pop_trunc))]
            offspring = offspring.copy()
            mutate(offspring)
            new_pop.append(offspring)

        population = new_pop

        gc.collect()