def coevo(): # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = [[x] for x in np.linspace(0, 0.5, 5)] genomes = BasicGenome.from_list(z_list, 5) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) avg_fitness = [] champ_fitness = [] for i in xrange(150): pi_pool = eonn.epoch(pi_pool, len(pi_pool)) z_pool = eonn.epoch(z_pool, len(z_pool)) for pi_org, z_org in itertools.product(pi_pool, z_pool): reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False) pi_org.evals.append(reward) z_org.evals.append(reward) for org in z_pool: org.evals = [np.var(org.evals)] avg_fitness.append(pi_pool.fitness) champion = max(pi_pool) champ_fitness.append(champion.fitness) return avg_fitness, champ_fitness
def do_evolution(pi_pool, z_pool, GP): """ Evolve the organisms and predict their values according to the GP """ # Evolve pools pi_pool = eonn.epoch(pi_pool, len(pi_pool)) z_pool = eonn.epoch(z_pool, len(z_pool)) # Create prediction matrix for GP x_predict = [ np.append(pi_org.weights, z_org.weights) for pi_org in pi_pool for z_org in z_pool ] # Get rewards and MSE reward_predict, MSE = GP.predict(x_predict, eval_MSE=True) return pi_pool, z_pool, x_predict, reward_predict, MSE
def find_best(GP, epochs=100): """ Find the best policy in the GP """ pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8) all_z = list(np.linspace(0, max_wind, 10)) for n in xrange(epochs): if n != 0: pool = eonn.epoch(pool, len(pool)) weights = [np.append(org.weights, z) for org in pool for z in all_z] reward = GP.predict(weights) for i in xrange(len(pool)): pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)]) champion = max(pool) return champion
def find_best_upper(GP, epochs=100): """ Find policy with highest upperbound """ pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8) all_z = list(np.linspace(0, max_wind, 10)) for n in xrange(epochs): if n != 0: pool = eonn.epoch(pool, len(pool)) weights = [np.append(org.weights, z) for org in pool for z in all_z] reward, MSE = GP.predict(weights, eval_MSE=True) reward += 1.96 * np.sqrt(MSE) for i in xrange(len(pool)): pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)]) champion = max(pool) return champion