def learn_nnet(self, num_restarts):
        self.learner = NeuralNetLearner()
        self.neural_net = self.learner.network
        self.dataset = self.learner.ds
        self.training_set, self.testing_set = self.learner.get_datasets()

        # Optimizer will take 2000 steps and restart, saving the best model from the restarts
        self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True,
                verbose = True, numParameters = 661, maxLearningSteps = 2000,  storeAllEvaluations = True)

        # Save best model and lowest MSE for random restarting
        best_model = self.neural_net
        min_MSE = 2147438647

        for i in range(num_restarts):
            temp, best_estimate = self.optimizer.learn()

            nnet_hc_evaluations_file = open('out/nnet_hc_evaluations.csv', 'a')
            for item in self.optimizer._allEvaluations:
                nnet_hc_evaluations_file.write("%s\n" % item)
            nnet_hc_evaluations_file.write("Restart %d\n" % i)

            self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True,
                    verbose = True, numParameters = 661, maxLearningSteps = 1000,  storeAllEvaluations = True)
            if best_estimate <= min_MSE:
                best_model = temp
                min_MSE = best_estimate

        self.neural_net = best_model

        return best_model
class HillClimbingOptimizer():

    def learn_nnet(self, num_restarts):
        self.learner = NeuralNetLearner()
        self.neural_net = self.learner.network
        self.dataset = self.learner.ds
        self.training_set, self.testing_set = self.learner.get_datasets()

        # Optimizer will take 2000 steps and restart, saving the best model from the restarts
        self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True,
                verbose = True, numParameters = 661, maxLearningSteps = 2000,  storeAllEvaluations = True)

        # Save best model and lowest MSE for random restarting
        best_model = self.neural_net
        min_MSE = 2147438647

        for i in range(num_restarts):
            temp, best_estimate = self.optimizer.learn()

            nnet_hc_evaluations_file = open('out/nnet_hc_evaluations.csv', 'a')
            for item in self.optimizer._allEvaluations:
                nnet_hc_evaluations_file.write("%s\n" % item)
            nnet_hc_evaluations_file.write("Restart %d\n" % i)

            self.optimizer = HillClimber(self.training_set.evaluateModuleMSE, self.neural_net, minimize=True,
                    verbose = True, numParameters = 661, maxLearningSteps = 1000,  storeAllEvaluations = True)
            if best_estimate <= min_MSE:
                best_model = temp
                min_MSE = best_estimate

        self.neural_net = best_model

        return best_model

    def learn_optimizationproblem(self, num_restarts, problem, fitness_function, minimize=False):
        # Optimizer will take 250 steps and restart, saving the best model from the restarts
        self.optimizer = HillClimber(fitness_function, problem, verbose = True,
                maxLearningSteps = 250, minimize=minimize, storeAllEvaluations = True)
        best_model = problem
        max_fitness = -2147438640

        for i in range(num_restarts):
            print("Restart", i)
            temp, best_estimate = self.optimizer.learn()

            out_name = 'out/opt_hc_evaluations_' + problem.__class__.__name__ + '.csv'
            opt_hc_evaluations_file = open(out_name, 'a')
            for item in self.optimizer._allEvaluations:
                opt_hc_evaluations_file.write("%s\n" % item)
            opt_hc_evaluations_file.write("Restart %d\n" % i)

            self.optimizer = HillClimber(fitness_function, problem, verbose = True,
                     maxLearningSteps = 250, minimize=True, storeAllEvaluations = True)
            if best_estimate >= max_fitness:
                best_model = temp
                max_fitness = best_estimate

        return best_model
Exemplo n.º 3
0
def generate(start):
    u = Universe()

    def fitness(f):
        return u.do_simulation(f, False)

    l = HillClimber(fitness, start, maxEvaluations=int(sys.argv[1]))
    best, fitn = l.learn()
    print(f"fitness: {fitn}")
    #u.do_simulation(u.bestfaller, noStop=False)
    u.record.finish()
    def learn_optimizationproblem(self, num_restarts, problem, fitness_function, minimize=False):
        # Optimizer will take 250 steps and restart, saving the best model from the restarts
        self.optimizer = HillClimber(fitness_function, problem, verbose = True,
                maxLearningSteps = 250, minimize=minimize, storeAllEvaluations = True)
        best_model = problem
        max_fitness = -2147438640

        for i in range(num_restarts):
            print("Restart", i)
            temp, best_estimate = self.optimizer.learn()

            out_name = 'out/opt_hc_evaluations_' + problem.__class__.__name__ + '.csv'
            opt_hc_evaluations_file = open(out_name, 'a')
            for item in self.optimizer._allEvaluations:
                opt_hc_evaluations_file.write("%s\n" % item)
            opt_hc_evaluations_file.write("Restart %d\n" % i)

            self.optimizer = HillClimber(fitness_function, problem, verbose = True,
                     maxLearningSteps = 250, minimize=True, storeAllEvaluations = True)
            if best_estimate >= max_fitness:
                best_model = temp
                max_fitness = best_estimate

        return best_model
Exemplo n.º 5
0
# from pybrain.rl.learners.continuous.policygradients import ENAC
# from pybrain.rl.agents.learning import LearningAgent
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment


# any episodic task
task = BalanceTask()

# any neural network controller
net = buildNetwork(task.outdim, 1, task.indim)

# any optimization algorithm to be plugged in, for example:
# learner = CMAES(storeAllEvaluations = True)
# or:
learner = HillClimber(storeAllEvaluations = True)

# in a non-optimization case the agent would be a LearningAgent:
# agent = LearningAgent(net, ENAC())
# here it is an OptimizationAgent:
agent = OptimizationAgent(net, learner)

# the agent and task are linked in an Experiment
# and everything else happens under the hood.
exp = EpisodicExperiment(task, agent)
exp.doEpisodes(100)

print('Episodes learned from:', len(learner._allEvaluations))
n, fit = learner._bestFound()
print('Best fitness found:', fit)
print('with this network:')
Exemplo n.º 6
0
from random import random
from pybrain.structure.evolvables.evolvable import Evolvable
from pybrain.optimization import HillClimber


class SimpleEvo(Evolvable):
    def __init__(self, x):
        self.x = max(0, min(x, 20))

    def mutate(self):
        self.x = max(0, min(self.x + random() - 0.3, 20))

    def copy(self):
        return SimpleEvo(self.x)

    def randomize(self):
        self.x = 20 * random()

    def __repr__(self):
        return '<-%.2f->' % (self.x)


if __name__ == "__main__":
    x0 = SimpleEvo(1.2)
    l = HillClimber(lambda x: x.x, x0, maxEvaluations=500)
    result = l.learn()
    print result
Exemplo n.º 7
0
from pybrain.optimization import HillClimber, CMAES  #@UnusedImport
# from pybrain.rl.learners.continuous.policygradients import ENAC
# from pybrain.rl.agents.learning import LearningAgent
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment

# any episodic task
task = BalanceTask()

# any neural network controller
net = buildNetwork(task.outdim, 1, task.indim)

# any optimization algorithm to be plugged in, for example:
# learner = CMAES(storeAllEvaluations = True)
# or:
learner = HillClimber(storeAllEvaluations=True)

# in a non-optimization case the agent would be a LearningAgent:
# agent = LearningAgent(net, ENAC())
# here it is an OptimizationAgent:
agent = OptimizationAgent(net, learner)

# the agent and task are linked in an Experiment
# and everything else happens under the hood.
exp = EpisodicExperiment(task, agent)
exp.doEpisodes(100)

print('Episodes learned from:', len(learner._allEvaluations))
n, fit = learner._bestFound()
print('Best fitness found:', fit)
print('with this network:')
Exemplo n.º 8
0
		error = np.log2(2048 - game.max_block)

		print 'score', game.score
		print 'max block', game.max_block
		print 'error', error
		print

		return error

	hc_params = []
	for m in nnet.connections.values():
		for c in m:
			hc_params.extend(c.params) #extend concatenates 2 arrays

	opt = HillClimber(checknn, hc_params)
	opt.minimize = True
	opt.maxEvaluations = 100000
	opt.learn()

NetworkWriter.writeToFile(nnet, filename)

# r = 15
# xvalues = np.arange(-r, r, 0.1)
# yvalues = [nnet.activate([x]) for x in xvalues]
#
# plot.figure(0)
# plot.plot(xvalues, yvalues)
# xvalues = np.arange(-r, r, 0.1)
# yvalues = [f(x) for x in xvalues]
# plot.plot(xvalues, yvalues)
Exemplo n.º 9
0
from pybrain.rl.environments.cartpole.balancetask import BalanceTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.optimization import HillClimber

task = BalanceTask()
net = buildNetwork(task.outdim, 3, task.indim)
HillClimber(task, net, maxEvaluations=100).learn()
agent = OptimizationAgent(net, HillClimber())
exp = EpisodicExperiment(task, agent)
print(exp.doEpisodes(100))
Exemplo n.º 10
0
from random import random
from pybrain.structure.evolvables.evolvable import Evolvable
from pybrain.optimization import HillClimber


class SimpleEvo(Evolvable):
    def __init__(self, x):
        self.x = max(0, min(x, 10))

    def mutate(self):
        self.x = max(0, min(self.x + random() - 0.3, 10))

    def copy(self):
        return SimpleEvo(self.x)

    def randomize(self):
        self.x = 10 * random()

    def __repr__(self):
        return "<-%.2f->" + str(self.x)


x0 = SimpleEvo(5)
l = HillClimber(lambda x: x.x, x0, maxEvaluations=50)
print l.learn()
Exemplo n.º 11
0
    target = PolyEvolve()
    target.randomize(n=nCoeffs)

    func = lambda x: 5 * (x ** 3) + -3 * (x ** 2) + 1 * (x ** 1) + 1 * (x ** 0)
    point = lambda x: (x, func(x))
    data = sorted([point(random.random() * 200 - 100) for x in xrange(25)], key=itemgetter(0))


    from pybrain.optimization import HillClimber

    seed = PolyEvolve()
    seed.randomize(n=nCoeffs)

    maxIters = 10000
    L = HillClimber(lambda x: x.fitness(data), seed, maxEvaluations=maxIters)

    result, fitness = L.learn()

    fmt = '{:>12}{:>24}{:>24}'
    s = fmt.format('X', 'Y', 'V')
    fmt = '{:>12.2f}{:>24.2f}{:>24.2f}'
    print '-' * len(s)
    print s
    print '-' * len(s)

    for x, y in data:
        v = result.eval(x)
        print fmt.format(x, y, v)

    print '-' * len(s)
Exemplo n.º 12
0
def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
Exemplo n.º 13
0
MAX_STEPS = 1200

#all_evals = []
#reward_avgs = [] # keep track of the average fitness per trial
#reward_maxes = [] # keep track of the maximum fitness per trial
#movement = [] # keep track of the seeker's movement
#food_loc = [] # keep track of the location of the food

# pybrain initialization
task = ChemotaxisTask(ChemotaxisEnv(), MAX_STEPS)
module = buildNetwork(2,2,2) # create a feed-forward neural network with 3 layers: 2 input neurons, 2 hidden neurons, and 2 output neurons
#learner = HillClimber(minimize=True, storeAllEvaluations=True, verbose=False)
#agent = OptimizationAgent(module, learner)
#exp = EpisodicExperiment(task, agent)
#exp.doEpisodes(MAX_TRIALS)
learner = HillClimber(task, module, maxEvaluations=MAX_TRIALS, mustMinimize=True, storeAllEvaluations=True, storeAllEvaluated=True, verbose=False)
learner.learn()
# _allEvaluations is a list of the sum of rewards for each trial, i.e. the fitness of each trial's network
# _allEvaluated is a list of the networks for each trial
#for network in learner._allEvaluated:
#    print network.params
reward_avgs = [e/MAX_STEPS for e in learner._allEvaluations]

"""
for i in range(0, MAX_TRIALS):
    exp.doInteractions(MAX_STEPS)
    agent.learn()
    
    print exp.agent.learner.module.params
    
    total_reward = agent.history.getSumOverSequences("reward")[0][0]