Exemplo n.º 1
0
def coevo():
    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = [[x] for x in np.linspace(0, 0.5, 5)]

    genomes = BasicGenome.from_list(z_list, 5)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)
    avg_fitness = []
    champ_fitness = []
    for i in xrange(150):
        pi_pool = eonn.epoch(pi_pool, len(pi_pool))
        z_pool = eonn.epoch(z_pool, len(z_pool))
        for pi_org, z_org in itertools.product(pi_pool, z_pool):
            reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False)
            pi_org.evals.append(reward)
            z_org.evals.append(reward)
        for org in z_pool:
            org.evals = [np.var(org.evals)]

        avg_fitness.append(pi_pool.fitness)
        champion = max(pi_pool)
        champ_fitness.append(champion.fitness)
    return avg_fitness, champ_fitness
Exemplo n.º 2
0
def do_evolution(pi_pool, z_pool, GP):
    """
		Evolve the organisms and predict their values according to the GP
	"""
    # Evolve pools
    pi_pool = eonn.epoch(pi_pool, len(pi_pool))
    z_pool = eonn.epoch(z_pool, len(z_pool))

    # Create prediction matrix for GP
    x_predict = [
        np.append(pi_org.weights, z_org.weights) for pi_org in pi_pool
        for z_org in z_pool
    ]

    # Get rewards and MSE
    reward_predict, MSE = GP.predict(x_predict, eval_MSE=True)

    return pi_pool, z_pool, x_predict, reward_predict, MSE
Exemplo n.º 3
0
def find_best(GP, epochs=100):
    """ Find the best policy in the GP """

    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8)
    all_z = list(np.linspace(0, max_wind, 10))
    for n in xrange(epochs):
        if n != 0:
            pool = eonn.epoch(pool, len(pool))
        weights = [np.append(org.weights, z) for org in pool for z in all_z]
        reward = GP.predict(weights)
        for i in xrange(len(pool)):
            pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)])

    champion = max(pool)

    return champion
Exemplo n.º 4
0
def find_best_upper(GP, epochs=100):
    """ Find policy with highest upperbound """

    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8)
    all_z = list(np.linspace(0, max_wind, 10))
    for n in xrange(epochs):
        if n != 0:
            pool = eonn.epoch(pool, len(pool))
        weights = [np.append(org.weights, z) for org in pool for z in all_z]
        reward, MSE = GP.predict(weights, eval_MSE=True)
        reward += 1.96 * np.sqrt(MSE)
        for i in xrange(len(pool)):
            pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)])

    champion = max(pool)

    return champion