agent.learner.gd.alpha = 0.3 #step size of \mu adaption agent.learner.gdSig.alpha = 0.15 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 batch=2 #number of samples per gradient estimate (was: 2; more here due to stochastic setting) #create experiment experiment = EpisodicExperiment(task, agent) prnts=1 #frequency of console output epis=2000/batch/prnts #actual roll outs filename="dataSPLA08NoRew"+repr(int(random.random()*1000000.0))+".dat" wf = open(filename, 'wb') for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) #execute #batch episodes agent.learn() #learn from the gather experience agent.reset() #reset agent and environment #print out related data stp = (updates+1)*batch*prnts print "Step: ", runs, "/", stp, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward wf.write(repr(stp)+"\n") wf.write(repr(agent.learner.baseline[0])+"\n") if useGraphics: pl.addData(0,float(stp),agent.learner.baseline) pl.addData(1,float(stp),agent.learner.best) pl.update() #if updates/100 == float(updates)/100.0: # saveWeights("walk.wgt", agent.learner.original) wf.close()
agent.learner.gdSig.alpha = 0.1 agent.learner.gd.momentum = 0.9 agent.learner.epsilon = 6.0 agent.learner.initSigmas() # agent.learner.rprop = True experiment = EpisodicExperiment(task, agent) batch = 16 prnts = 10 epis = 50000 / batch / prnts save = False rl = [] for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) agent.learn() agent.reset() print "Parameters: ", agent.learner.original print "Epsilon : ", agent.learner.sigList print "Step: ", runs, "/", ( updates + 1 ) * batch * prnts, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward print "" rl.append(float(agent.learner.baseline)) if save: fnStart = "dataCartMom" fnExp = repr(int(agent.learner.gd.alpha * 100)) + "m" + repr( int(agent.learner.gdSig.alpha * 100)) + "s" + repr( batch / 2) + "b" + repr(int(agent.learner.epsilon * 10)) + "e" fnIdent = "SPLA" + repr(int(random.random() * 1000000.0)) filename = fnStart + fnExp + fnIdent + ".dat"
agent.learner.gdSig.alpha = 0.085 #step size of \sigma adaption agent.learner.gd.momentum = 0.0 #Loading weights if loadNet: agent.learner.original = loadWeights("grasp.wgt") agent.learner.gd.init(agent.learner.original) agent.learner.epsilon = 0.2 agent.learner.initSigmas() batch = 2 #number of samples per gradient estimate #create experiment experiment = EpisodicExperiment(task, agent) prnts = 1 #frequency of console output epis = 5000000 / batch / prnts #actual roll outs for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) #execute batch episodes agent.learn() #learn from the gather experience agent.reset() #reset agent and environment #print out related data print "Step: ", runs, "/", ( updates + 1) * batch * prnts, "Best: ", agent.learner.best, print "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward #Saving weights if saveNet: if updates / 100 == float(updates) / 100.0: saveWeights(saveName, agent.learner.original)
agent.learner.gdSig.alpha = 0.1 agent.learner.gd.momentum = 0.9 agent.learner.epsilon = 6.0 agent.learner.initSigmas() # agent.learner.rprop = True experiment = EpisodicExperiment(task, agent) batch = 16 prnts = 10 epis = 50000 / batch / prnts save = False rl = [] for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) agent.learn() agent.reset() print "Parameters: ", agent.learner.original print "Epsilon : ", agent.learner.sigList print "Step: ", runs, "/", ( updates + 1 ) * batch * prnts, "Best: ", agent.learner.best, "Base: ", agent.learner.baseline, "Reward: ", agent.learner.reward print "" rl.append(float(agent.learner.baseline)) if save: fnStart = "dataCartMom" fnExp = ( repr(int(agent.learner.gd.alpha * 100)) + "m" + repr(int(agent.learner.gdSig.alpha * 100)) + "s"