def train(self, episodes, maxSteps): avgReward = 0 # set up environment and task self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories) self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \ do_decay_beliefs = True, uniformInitialBeliefs = True) # create neural net and learning agent self.params = buildNetwork(self.task.outdim, self.task.indim, \ bias=True, outclass=SoftmaxLayer) if self._PGPE: self.agent = OptimizationAgent(self.params, PGPE(minimize=False, verbose=False)) elif self._CMAES: self.agent = OptimizationAgent( self.params, CMAES(minimize=False, verbose=False)) # init and perform experiment exp = EpisodicExperiment(self.task, self.agent) for i in range(episodes): exp.doEpisodes(1) avgReward += self.task.getTotalReward() print "reward episode ", i, self.task.getTotalReward() # print initial info print "\naverage reward over training = ", avgReward / episodes # save trained network self._saveWeights()
env = None for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = ShipSteeringEnvironment() # create task task = GoNorthwardTask(env, maxsteps=500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent( net, PGPE(learningRate=0.3, sigmaLearningRate=0.15, momentum=0.0, epsilon=2.0, rprop=False, storeAllEvaluations=True)) et.agent = agent #create experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
from pybrain.optimization import ExactNES from pybrain.rl.experiments import EpisodicExperiment batch = 2 #number of samples per learning step prnts = 100 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) print "Epsilon : ", agent.learner.sigma et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps()
hiddenUnits = 4 batch=2 #number of samples per learning step prnts=1 #number of learning steps after results are printed epis=5000000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = FlexCubeEnvironment() # create task task = WalkTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed)
# -*- coding: utf-8 -*- """ Created on Thu Mar 7 20:41:24 2013 Just to try and get things working with a GA @author: david """ from pybrain.tools.shortcuts import buildNetwork from pybrain.structure.parametercontainer import ParameterContainer from pybrain.rl.environments.functions.unimodal import TabletFunction from pybrain.rl.environments.cartpole.balancetask import BalanceTask, CartPoleEnvironment from pybrain.optimization import GA from pybrain.rl.agents import LearningAgent, OptimizationAgent environment = CartPoleEnvironment() task = BalanceTask() nn = buildNetwork(task.outdim, 6, task.indim) learning_agent = OptimizationAgent(nn, GA())
rospy.loginfo('Using %d CPUs for experiments', num_cpus) exp_desciptions = [] for i in range(num_experiments): # set up environment, task, neural net, agent, and experiment env = InfoMaxEnv(object_names, action_names, num_objects, False) task = InfoMaxTask(env, max_steps=max_steps) net = buildNetwork(task.outdim, task.indim, bias=True, outclass=SoftmaxLayer) if algorithm == 'pgpe': agent = OptimizationAgent( net, PGPE(storeAllEvaluations=True, minimize=False, verbose=False)) elif algorithm == 'cmaes': agent = OptimizationAgent(net, CMAES(minimize=False, verbose=False)) experiment = EpisodicExperiment(task, agent) exp_desciptions.append([i, agent, experiment, task]) pool = Pool(processes=num_cpus) res = [] if algorithm == 'pgpe': res = pool.map(run_experiment, exp_desciptions) elif algorithm == 'cmaes':
def main(): if len(sys.argv) != 2: print 'Please provide a path to a model data directory.' print ('The script will load the newest model data from the directory,' 'then continue to improve that model') sys.exit(0) model_directory = sys.argv[1] existing_models = sorted(glob(os.path.join(model_directory, '*.rlmdl'))) if existing_models: newest_model_name = existing_models[-1] iteration_count = int(newest_model_name[-12:-6]) + 1 print 'Loading model {}'.format(newest_model_name) newest_model = open(newest_model_name, 'r') agent = pickle.load(newest_model) else: net = buildNetwork(Environment.outdim, Environment.outdim + Environment.indim, Environment.indim) agent = OptimizationAgent(net, PGPE()) iteration_count = 1 environment = Environment(LOCAL_HOST, PORT, PATH_TO_SCENE) task = Task(environment) experiment = EpisodicExperiment(task, agent) def signal_handler(signal, frame): print 'Exiting gracefully' environment.teardown() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) while True: time.sleep(1) print '>>>>> Running iteration {}'.format(iteration_count) # NOTE this weird stuff is hacky, but we need it to plug in our autosave # stuff properly. Took a long time to figure this out. experiment.optimizer.maxEvaluations = experiment.optimizer.numEvaluations + experiment.optimizer.batchSize try: experiment.doEpisodes() except Exception as e: print 'ERROR RUNNING SIMULATION: \n{}'.format(e) environment.teardown() else: if iteration_count % AUTOSAVE_INTERVAL == 0: filename = str(iteration_count).zfill(6) + '.rlmdl' filename = os.path.join(model_directory, filename) f = open(filename, 'w+') print 'Saving model to {}'.format(filename) pickle.dump(agent, f) iteration_count += 1 print 'Iteration finished <<<<<'
from pybrain.rl.experiments import EpisodicExperiment # any episodic task task = BalanceTask() # any neural network controller net = buildNetwork(task.outdim, 1, task.indim) # any optimization algorithm to be plugged in, for example: # learner = CMAES(storeAllEvaluations = True) # or: learner = HillClimber(storeAllEvaluations=True) # in a non-optimization case the agent would be a LearningAgent: # agent = LearningAgent(net, ENAC()) # here it is an OptimizationAgent: agent = OptimizationAgent(net, learner) # the agent and task are linked in an Experiment # and everything else happens under the hood. exp = EpisodicExperiment(task, agent) exp.doEpisodes(100) print('Episodes learned from:', len(learner._allEvaluations)) n, fit = learner._bestFound() print('Best fitness found:', fit) print('with this network:') print(n) print('containing these parameters:') print(fListToString(n.params, 4))
numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting env = None for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = JohnnieEnvironment() # create task task = StandingTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, PGPE(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
from pybrain.rl.experiments import EpisodicExperiment batch=2 #number of samples per learning step prnts=100 #number of learning steps after results are printed epis=int(4000/batch/prnts) #number of roleouts numbExp=40 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting expList = ["PGPE(storeAllEvaluations = True)", "ExactNES(storeAllEvaluations = True)", "FEM(storeAllEvaluations = True)", "CMAES(storeAllEvaluations = True)"] for e in expList: for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, eval(e)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.nextExps() et.showExps()
from pybrain.rl.environments.cartpole.balancetask import BalanceTask from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.agents import OptimizationAgent from pybrain.rl.experiments import EpisodicExperiment from pybrain.optimization import HillClimber task = BalanceTask() net = buildNetwork(task.outdim, 3, task.indim) HillClimber(task, net, maxEvaluations=100).learn() agent = OptimizationAgent(net, HillClimber()) exp = EpisodicExperiment(task, agent) print(exp.doEpisodes(100))
from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import CMAES from pybrain.rl.experiments import EpisodicExperiment batch = 2 #number of samples per learning step prnts = 100 #number of learning steps after results are printed epis = int(4000 / batch / prnts) #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, CMAES(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps()
def run_experiment(): # Create the controller network HIDDEN_NODES = 4 RUNS = 2 BATCHES = 1 PRINTS = 1 EPISODES = 500 env = None start_state_net = None run_results = [] # Set up plotting tools for the experiments tools = ExTools(BATCHES, PRINTS) # Run the experiment for run in range(RUNS): if run == 0: continue # If an environment already exists, shut it down if env: env.closeSocket() # Create the environment env = create_environment() # Create the task task = Pa10MovementTask(env) # Create the neural network. Only create the network once so it retains # the same starting values for each run. if start_state_net: net = start_state_net.copy() else: # Create the initial neural network net = create_network( in_nodes=env.obsLen, hidden_nodes=HIDDEN_NODES, out_nodes=env.actLen ) start_state_net = net.copy() # Create the learning agent learner = HillClimber(storeAllEvaluations=True) agent = OptimizationAgent(net, learner) tools.agent = agent # Create the experiment experiment = EpisodicExperiment(task, agent) # Perform all episodes in the run for episode in range(EPISODES): experiment.doEpisodes(BATCHES) # Calculate results all_results = agent.learner._allEvaluations max_result = np.max(all_results) min_result = np.min(all_results) avg_result = np.sum(all_results) / len(all_results) run_results.append((run, max_result, min_result, avg_result)) # Make the results directory if it does not exist if not os.path.exists(G_RESULTS_DIR): os.mkdir(G_RESULTS_DIR) # Write all results to the results file with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f: # Store the calculated max, min, avg f.write('RUN, MAX, MIN, AVG\n') f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result)) # Store all results from this run f.write('EPISODE, REWARD\n') for episode, result in enumerate(all_results): f.write('%d, %f\n' % (episode, result)) return
batch = 2 #number of samples per learning step prnts = 1 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = AcrobotEnvironment() # create task task = GradualRewardTask(env) # create controller network net = buildNetwork(len(task.getObservation()), env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, FiniteDifferences(storeAllEvaluations=True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
best_params = [] best_reward = -1000 num_cpus = cpu_count() rospy.loginfo('Using %d CPUs for experiments', num_cpus) exp_desciptions = [] for i in range(num_experiments): # set up environment, task, neural net, agent, and experiment env = InfoMaxEnv(object_names, action_names, num_objects, False) task = InfoMaxTask(env, max_steps=max_steps) net = buildNetwork(task.outdim, task.indim, bias=True, outclass=SoftmaxLayer) if algorithm == 'pgpe': agent = OptimizationAgent(net, PGPE(storeAllEvaluations=True,minimize=False,verbose=False)) elif algorithm == 'cmaes': agent = OptimizationAgent(net, CMAES(minimize=False,verbose=False)) experiment = EpisodicExperiment(task, agent) exp_desciptions.append([i, agent, experiment, task]) pool = Pool(processes=num_cpus) res = [] if algorithm == 'pgpe': res = pool.map(run_experiment, exp_desciptions) elif algorithm == 'cmaes': for desc in exp_desciptions: res.append(run_experiment(desc))