Exemplo n.º 1
0
 def evolve_policy(self):
     """ Evolve a specialized policy using learned model. """
     pool = spawn(Genome.open(PREFIX + "model.net"), 50)
     feval = functions.Evaluator(self.model)
     self.org = max(eonn.optimize(pool, feval.call, 2500, verbose=False))
     self.org.evals = []
     self.pool.append(self.org)
Exemplo n.º 2
0
 def evolve_policy(self):
   """ Evolve a specialized policy using learned model. """
   pool = spawn(Genome.open(PREFIX + 'model.net'), 50)
   feval = functions.Evaluator(self.model)
   self.org = max(eonn.optimize(pool, feval.call, 2500, verbose=False))
   self.org.evals = []
   self.pool.append(self.org)
Exemplo n.º 3
0
 def evolve_policy(self):
     """ Learn a model from flightdata and evolve specialized policies. """
     params = model.estimate_params(self.log.name)
     noise_std = model.estimate_std(self.log.name, params)
     heli = ghh.Helicopter(params, noise_std, 0.1)
     genome = Genome.open(PREFIX + 'baseline.net')
     self.org = functions.evolve(heli, genome, epochs=500)
Exemplo n.º 4
0
def main():
    """ Main function. """
    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=1)

    # Set evolutionary parameters
    eonn.samplesize = 5  # Sample size used for tournament selection
    eonn.keep = 5  # Nr. of organisms copied to the next generation (elitism)
    eonn.mutate_prob = 0.75  # Probability that offspring is being mutated
    eonn.mutate_frac = 0.2  # Fraction of genes that get mutated
    eonn.mutate_std = 0.1  # Std. dev. of mutation distribution (gaussian)
    eonn.mutate_repl = 0.25  # Probability that a gene gets replaced

    directory = "pics/" + ''.join(rand.sample(letters + digits, 5))
    os.makedirs(directory)
    # Evolve population
    for j in xrange(1, ROUNDS + 1):
        pool = eonn.optimize(pool, cliff, epochs=EPOCHS, evals=EVALS)
        print "AFTER EPOCH", j * EPOCHS
        print "average fitness %.1f" % pool.fitness
        champion = max(pool)
        print "champion fitness %.1f" % champion.fitness
        for i in xrange(10):
            cliff(champion.policy, verbose=True)
        plt.savefig(directory + "/" + str(j * EPOCHS) + ".png")
        plt.clf()
    with open(directory + '/best.net', 'w') as f:
        f.write('%s' % champion.genome)
    print "Done, everything saved in ", directory
Exemplo n.º 5
0
  def __init__(self):
    """ Initialize agent. """
    self.episode = 0
#    self.org = Organism(Genome.open(PREFIX + 'generic.net'))
    self.org = Organism(Genome.open(random.choice(POLICIES)))
    self.pool = [self.org]
    self.log = open('%s.dat' % ''.join(random.sample(letters + digits, 10)), 'w')
Exemplo n.º 6
0
def initGP():
    """Do simulations with random pi,z and create GP, X, y"""
    poolsize = 68
    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), poolsize, std=10)
    X = []
    for i, org in enumerate(pool):
        org.mutate()
        genome = org.genome
        w = genome.weights
        z = [np.random.uniform(0, 0.3)]
        reward = cliff(genome, z)

        while reward <= 0 and len(X) < poolsize / 2:
            #Train input policies to reach the goal.
            org.mutate()
            genome = org.genome
            w = genome.weights
            reward = cliff(genome, z)

        if not len(X):
            X = np.atleast_2d(w + z)
            y = np.atleast_2d([reward])
        else:
            X = np.append(X, [w + z], axis=0)
            y = np.append(y, [reward])

    # Initialize GP with kernel parameters.
    GP = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.)

    GP.fit(X, y)

    return GP, X, y
Exemplo n.º 7
0
def coevo():
    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = [[x] for x in np.linspace(0, 0.5, 5)]

    genomes = BasicGenome.from_list(z_list, 5)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)
    avg_fitness = []
    champ_fitness = []
    for i in xrange(150):
        pi_pool = eonn.epoch(pi_pool, len(pi_pool))
        z_pool = eonn.epoch(z_pool, len(z_pool))
        for pi_org, z_org in itertools.product(pi_pool, z_pool):
            reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False)
            pi_org.evals.append(reward)
            z_org.evals.append(reward)
        for org in z_pool:
            org.evals = [np.var(org.evals)]

        avg_fitness.append(pi_pool.fitness)
        champion = max(pi_pool)
        champ_fitness.append(champion.fitness)
    return avg_fitness, champ_fitness
Exemplo n.º 8
0
 def __init__(self):
     """ Initialize agent. """
     self.episode = 0
     #    self.org = Organism(Genome.open(PREFIX + 'generic.net'))
     self.org = Organism(Genome.open(random.choice(POLICIES)))
     self.pool = [self.org]
     self.log = open("%s.dat" % "".join(random.sample(letters + digits, 10)), "w")
Exemplo n.º 9
0
 def evolve_policy(self, n=1):
   """ Learn a model from flightdata and evolve specialized policies. """
   params = model.estimate_params(self.log.name)
   noise_std = model.estimate_std(self.log.name, params)
   heli = ghh.Helicopter(params, noise_std, 0.1)
   genome = Genome.open(PREFIX + 'baseline.net')
   for i in range(n):
     champion = functions.evolve(heli, genome, epochs=500)
     champion.evals = list()
     self.pool.append(champion)
Exemplo n.º 10
0
 def evolve_policy(self, n=1):
     """ Learn a model from flightdata and evolve specialized policies. """
     params = model.estimate_params(self.log.name)
     noise_std = model.estimate_std(self.log.name, params)
     heli = ghh.Helicopter(params, noise_std, 0.1)
     genome = Genome.open(PREFIX + 'baseline.net')
     for i in range(n):
         champion = functions.evolve(heli, genome, epochs=500)
         champion.evals = list()
         self.pool.append(champion)
Exemplo n.º 11
0
 def learn_genomeIRL(self): #input the reward function
   pool = Pool.spawn(Genome.open('policies/generic.net'), 20)
   # Set evolutionary parameters
   eonnIRL.keep = 15 ; eonnIRL.mutate_prob = 0.4 ; eonnIRL.mutate_frac = 0.1;eonnIRL.mutate_std = 0.8;eonnIRL.mutate_repl = 0.15
   # Evolve population
   pool = eonnIRL.optimize(pool, self.percieved_eval,400) # These are imported functions from EONNIRL
   champion = max(pool)
   # Print results
   print '\nerror:', math.exp(1 / self.percieved_eval(champion.policy))
   #print '\ngenome:\n%s' % champion.genome
   return champion.policy
Exemplo n.º 12
0
 def learnGenomeIRL(theta):  #input the reward function
     pool = Pool.spawn(Genome.open('policies/generic.net'), 20)
     # Set evolutionary parameters
     eonnIRL.keep = 15
     eonnIRL.mutate_prob = 0.4
     eonnIRL.mutate_frac = 0.1
     eonnIRL.mutate_std = 0.8
     eonnIRL.mutate_repl = 0.15
     # Evolve population
     pool = eonnIRL.optimize(pool, hoverIRL, theta)
     champion = max(pool)
     # Print results
     print '\nerror:', math.exp(1 / hover(champion.policy))
     print '\nerror:', math.exp(1 / hoverIRL(champion.policy, theta))
     print '\ngenome:\n%s' % champion.genome
Exemplo n.º 13
0
 def learnGenomeIRL(theta): #input the reward function
   pool = Pool.spawn(Genome.open('policies/generic.net'), 20)
   # Set evolutionary parameters
   eonnIRL.keep = 15
   eonnIRL.mutate_prob = 0.4
   eonnIRL.mutate_frac = 0.1
   eonnIRL.mutate_std = 0.8
   eonnIRL.mutate_repl = 0.15
   # Evolve population
   pool = eonnIRL.optimize(pool, hoverIRL,theta)
   champion = max(pool)
   # Print results
   print '\nerror:', math.exp(1 / hover(champion.policy))
   print '\nerror:', math.exp(1 / hoverIRL(champion.policy,theta))
   print '\ngenome:\n%s' % champion.genome
Exemplo n.º 14
0
def main():
  """ Main function. """
  pool = Pool.spawn(Genome.open('mc.net'), 20, std=5.0)
  # Set evolutionary parameters
  eonn.keep = 5
  eonn.mutate_prob = 0.9
  eonn.mutate_frac = 0.2
  eonn.mutate_std = 8.0
  eonn.mutate_repl = 0.1
  # Evolve population
  pool = eonn.optimize(pool, mc)
  champion = max(pool)
  # Print results
  print '\ntrace:'
  mc(champion.policy, verbose=True)
  print '\ngenome:\n%s' % champion.genome
Exemplo n.º 15
0
def find_best(GP, epochs=100):
    """ Find the best policy in the GP """

    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8)
    all_z = list(np.linspace(0, max_wind, 10))
    for n in xrange(epochs):
        if n != 0:
            pool = eonn.epoch(pool, len(pool))
        weights = [np.append(org.weights, z) for org in pool for z in all_z]
        reward = GP.predict(weights)
        for i in xrange(len(pool)):
            pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)])

    champion = max(pool)

    return champion
Exemplo n.º 16
0
def main():
	""" Main function. """
	pool = Pool.spawn(Genome.open('mc.net'), 20, std=5.0)
	# Set evolutionary parameters
	eonn.KEEP = 5
	eonn.MUTATE_PROB = 0.9
	eonn.MUTATE_FRAC = 0.2
	eonn.MUTATE_STD = 8.0
	eonn.MUTATE_REPL = 0.1
	# Evolve population
	pool = eonn.optimize(pool, mc)
	champion = max(pool)
	# Print results
	print '\ntrace:'
	mc(champion.policy, verbose=True)
	print '\ngenome:\n%s' % champion.genome
Exemplo n.º 17
0
def find_best_upper(GP, epochs=100):
    """ Find policy with highest upperbound """

    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8)
    all_z = list(np.linspace(0, max_wind, 10))
    for n in xrange(epochs):
        if n != 0:
            pool = eonn.epoch(pool, len(pool))
        weights = [np.append(org.weights, z) for org in pool for z in all_z]
        reward, MSE = GP.predict(weights, eval_MSE=True)
        reward += 1.96 * np.sqrt(MSE)
        for i in xrange(len(pool)):
            pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)])

    champion = max(pool)

    return champion
Exemplo n.º 18
0
def readPolicies():
    """
		Reads in policies from relative
		path littleWindPath and muchWindPath
	"""
    policies = []
    for ptype in [
            regularPolicies, rarePolicies, GPSmartPolicies, GPExtremePolicies
    ]:

        typePolicies = []
        for policy in os.walk(ptype).next()[1]:
            org = Network(Genome.open(ptype + "/" + policy + "/best.net"))

            typePolicies.append(org)
        policies.append(typePolicies)

    return policies
Exemplo n.º 19
0
def acquisition(GP, epochs):
    """
		Select the best (pi,z)-pair to evaluate using GP and GA
	"""

    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20)))

    genomes = BasicGenome.from_list(z_list, 20)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)

    for _ in xrange(epochs):
        pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution(
            pi_pool, z_pool, GP)

        # get scores
        reward_predictGrid = np.reshape(reward_predict,
                                        (len(pi_pool), len(z_pool)))

        ub = 1.96 * np.sqrt(MSE)

        ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool)))

        pi_score = score_pi(reward_predictGrid, ub_predictGrid)
        z_score = score_z(reward_predictGrid, ub_predictGrid)

        # add scores to organisms

        add_pi_scores(pi_pool, x_predict, pi_score)
        add_z_scores(z_pool, x_predict, z_score)

    # return current best pi and z
    pi_org = max(pi_pool)
    z_org = max(z_pool)

    return pi_org, z_org
Exemplo n.º 20
0
from eonn.organism import Pool


def xor(policy, verbose=False):
  """ XOR evaluation function. """
  err = 0.0
  input = [(i, j) for i in range(2) for j in range(2)]
  for i in input:
    output = policy.propagate(i, 1);
    err += (output[0] - (i[0] ^ i[1]))**2
    if verbose:
      print i, '-> %.4f' % output[0]
  return 1.0 / err


if __name__ == '__main__':
  pool = Pool.spawn(Genome.open('xor.net'), 30)
  # Set evolutionary parameters
  eonn.keep = 1
  eonn.mutate_prob = 0.9
  eonn.mutate_frac = 0.25
  eonn.mutate_std = 0.8
  eonn.mutate_repl = 0.2
  # Evolve population
  pool = eonn.optimize(pool, xor)
  champion = max(pool)
  # Print results
  print '\noutput:'
  xor(champion.policy, True)
  print '\ngenome:\n%s' % champion.genome
Exemplo n.º 21
0
Arquivo: heli.py Projeto: afcarl/ATAA
from eonn.genome import Genome
from eonn.organism import Pool
from helicopter.helicopter import Helicopter, XcellTempest


def hover(policy):
    """ Helicopter evaluation function. """
    state, sum_error = heli.reset()
    while not heli.terminal:
        action = policy.propagate(state, 1)
        state, error = heli.update(action)
        sum_error += error
    return 1 / math.log(sum_error)


if __name__ == '__main__':
    heli = Helicopter(XcellTempest.params, XcellTempest.noise_std)
    pool = Pool.spawn(Genome.open('baseline.net'), 20)
    # Set evolutionary parameters
    eonn.keep = 15
    eonn.mutate_prob = 0.9
    eonn.mutate_frac = 0.1
    eonn.mutate_std = 0.8
    eonn.mutate_repl = 0.15
    # Evolve population
    pool = eonn.optimize(pool, hover)
    champion = max(pool)
    # Print results
    print '\nerror:', math.exp(1 / hover(champion.policy))
    print '\ngenome:\n%s' % champion.genome
Exemplo n.º 22
0
import sys
import random

sys.path.append('/home/koppejan/projects/helicopter')

import functions
from string import letters, digits
from eonn.genome import Genome
from eonn.organism import Organism
from helicopter import ghh, model
from helicopter.quaternion import quaternion_from_orientation

TOKENS = letters + digits
PREFIX = '/home/koppejan/helicopter/helicopter/ghh09/policies/'
POLICIES = [Genome.open(PREFIX + 'mdp%i.net' % i) for i in (3, 6)]


class Agent:
    """ Hybrid agent, a combination of model-free and model-based learning. """
    def __init__(self):
        """ Initialize agent. """
        self.episode = 0
        self.pool = [Organism(genome) for genome in POLICIES]
        self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
        self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')

    def start(self):
        """ Start a new episode """
        self.set_policy()
        self.episode += 1
        self.reward = 0
Exemplo n.º 23
0
 def __init__(self):
     """ Initialize agent. """
     self.episode = 0
     self.org = Organism(Genome.open(PREFIX + 'generic.net'))
     self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
Exemplo n.º 24
0
from eonn.organism import Pool


def xor(policy, verbose=False):
	""" XOR evaluation function. """
	err = 0.0
	input = [(i, j) for i in range(2) for j in range(2)]
	for i in input:
		output = policy.propagate(i, 1);
		err += (output[0] - (i[0] ^ i[1]))**2
		if verbose:
			print i, '-> %.4f (%d)' % (output[0], round(output[0]))
	return 1.0 / err


if __name__ == '__main__':
	pool = Pool.spawn(Genome.open('xor.net'), 30)
	# Set evolutionary parameters
	eonn.KEEP = 1
	eonn.MUTATE_PROB = 0.9
	eonn.MUTATE_FRAC = 0.25
	eonn.MUTATE_STD = 0.8
	eonn.MUTATE_REPL = 0.2
	# Evolve population
	pool = eonn.optimize(pool, xor)
	champion = max(pool)
	# Print results
	print '\noutput:'
	xor(champion.policy, True)
	print '\ngenome:\n%s' % champion.genome
Exemplo n.º 25
0
 def __init__(self):
   """ Initialize agent. """
   self.episode = 0
   self.pool = [Organism(genome) for genome in POLICIES]
   self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
   self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
Exemplo n.º 26
0
from eonn.genome import Genome
from eonn.organism import Organism


POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)]



class Expert:
  """ Model-Free agent, chooses best policy from a set of pre-trained policies. """
  def __init__(self,policyNumber):
    """ Initialize expert. """
    self.episode = 0
    #self.org = [Genome.open('policies/mdp%i.net' %i)]
    self.pool = POLICIES[policyNumber]
    self.org = Organism(POLICIES[policyNumber])

  def start(self):
    """ Start a new episode """
    self.reward = 0
    self.steps = 0

  def step(self, state, reward):
    """ Choose an action based on the current state. """
    self.steps += 1
    self.reward += reward
    # DEBUG
    action = self.org.policy.propagate(state, 1)
    return action

  def end(self, reward):
Exemplo n.º 27
0
from eonn.genome import Genome
from eonn.organism import Pool
from helicopter.helicopter import Helicopter, XcellTempest


def hover(policy):
  """ Helicopter evaluation function. """
  state, sum_error = heli.reset()
  while not heli.terminal:
    action = policy.propagate(state, 1)
    state, error = heli.update(action)
    sum_error += error
  return 1 / math.log(sum_error)


if __name__ == '__main__':
  heli = Helicopter(XcellTempest.params, XcellTempest.noise_std)
  pool = Pool.spawn(Genome.open('baseline.net'), 20)
  # Set evolutionary parameters
  eonn.keep = 15
  eonn.mutate_prob = 0.9
  eonn.mutate_frac = 0.1
  eonn.mutate_std = 0.8
  eonn.mutate_repl = 0.15
  # Evolve population
  pool = eonn.optimize(pool, hover)
  champion = max(pool)
  # Print results
  print '\nerror:', math.exp(1 / hover(champion.policy))
  print '\ngenome:\n%s' % champion.genome
Exemplo n.º 28
0
 def crash_control(self, state):
     if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15:
         self.crashed = True
         self.org.evals.append(100000)
         self.org = Organism(Genome.open(PREFIX + "baseline.net"))
Exemplo n.º 29
0
 def crash_control(self, state):
   if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15:
     self.crashed = True
     self.org.evals.append(100000)
     self.org = Organism(Genome.open(PREFIX + 'baseline.net'))
Exemplo n.º 30
0
import sys, os
# relative paths required
dir = os.path.dirname(__file__)
prev = os.path.dirname(dir)

two = prev + '/site-packages/'
sys.path.append(prev)
sys.path.append(dir)
sys.path.append(two)
from eonn.genome import Genome
from eonn.organism import Organism
from apprentice import *
import numpy as np
import math

POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)]
pol = Organism(POLICIES[2])


def normalise_vector(vector):
    return np.divide(vector, np.linalg.norm(vector))


def modify_mu(mu_apprentice, mu_modified, mu_expert):
    factor = np.dot(mu_apprentice[-1] - mu_modified[-1],
                    mu_expert - mu_apprentice[-1]) / np.dot(
                        mu_apprentice[-1] - mu_modified[-1],
                        mu_apprentice[-1] - mu_modified[-1])
    mu_new = mu_modified[-1] + factor * (mu_apprentice[-1] - mu_modified[-1])
    mu_modified.append(mu_new)
Exemplo n.º 31
0
from eonn.genome import Genome
from eonn.organism import Organism


#POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)]
POLICIES = [Genome.open('policies/mdp0.net')]


class Agent:
  """ Model-Free agent, chooses best policy from a set of pre-trained policies. """
  def __init__(self):
    """ Initialize agent. """
    self.episode = 0
    self.pool = [Organism(genome) for genome in POLICIES]

  def start(self):
    """ Start a new episode """
    self.set_policy()
    self.episode += 1
    self.reward = 0
    self.steps = 0

  def step(self, state, reward):
    """ Choose an action based on the current state. """
    self.steps += 1
    self.reward += reward
    # DEBUG
    action = self.org.policy.propagate(state, 1)
    if self.steps==1:
      print '%+.10f ' * 12 % tuple(state),
      print '%+.10f ' * 4 % tuple(action)
Exemplo n.º 32
0
import sys
import random

sys.path.append('/home/koppejan/projects/helicopter')

import functions
from string import letters, digits
from eonn.genome import Genome
from eonn.organism import Organism
from helicopter import ghh, model
from helicopter.quaternion import quaternion_from_orientation

TOKENS = letters + digits
PREFIX = '/home/koppejan/helicopter/helicopter/ghh09/policies/'
POLICIES = [Genome.open(PREFIX + 'mdp%i.net' % i) for i in (3, 6)]


class Agent:
  """ Hybrid agent, a combination of model-free and model-based learning. """
  def __init__(self):
    """ Initialize agent. """
    self.episode = 0
    self.pool = [Organism(genome) for genome in POLICIES]
    self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
    self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')

  def start(self):
    """ Start a new episode """
    self.set_policy()
    self.episode += 1
    self.reward = 0
Exemplo n.º 33
0
 def __init__(self):
     """ Initialize agent. """
     self.episode = 0
     self.pool = [Organism(genome) for genome in POLICIES]
     self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
     self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')