obs[-2 + self.extraRandoms] = 0.1 * sin(angle1) + cartpos if self.numPoles == 2: if self.markov: angle2 = obs[3] else: angle2 = obs[1] obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos if self.extraRandoms > 0: obs[-self.extraRandoms:] = randn(self.extraRandoms) if self.verbose: print('obs', obs) return obs def performAction(self, action): if self.verbose: print('act', action) impl.performAction(action[0]) self.addReward() if __name__ == '__main__': from pybrain.rl import EpisodicExperiment from pybrain.rl.agents import FlatNetworkAgent x = FastCartPoleTask() a = FlatNetworkAgent(x.outdim, x.indim) e = EpisodicExperiment(x, a) e.doEpisodes(2)
#net.initParams(0.0) # create agent agent = StateDependentAgent(net, ENAC()) agent.learner.gd.rprop = True # only relevant for RP agent.learner.gd.deltamin = 0.0001 #agent.learner.gd.deltanull = 0.05 # only relevant for BP agent.learner.gd.alpha = 0.01 agent.learner.gd.momentum = 0.9 agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print agent.module.params rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop()
if self.numPoles == 2: if self.markov: angle2 = obs[3] else: angle2 = obs[1] obs[-3 + self.extraRandoms] = 0.05 * cos(angle2) + cartpos obs[-4 + self.extraRandoms] = 0.05 * sin(angle2) + cartpos if self.extraRandoms > 0: obs[-self.extraRandoms :] = randn(self.extraRandoms) if self.verbose: print "obs", obs return obs def performAction(self, action): if self.verbose: print "act", action impl.performAction(action[0]) self.addReward() if __name__ == "__main__": from pybrain.rl import EpisodicExperiment from pybrain.rl.agents import FlatNetworkAgent x = FastCartPoleTask() a = FlatNetworkAgent(x.outdim, x.indim) e = EpisodicExperiment(x, a) e.doEpisodes(2)
# create agent agent = StateDependentAgent(net, ENAC()) agent.learner.gd.rprop = True # only relevant for RP agent.learner.gd.deltamin = 0.0001 #agent.learner.gd.deltanull = 0.05 # only relevant for BP agent.learner.gd.alpha = 0.01 agent.learner.gd.momentum = 0.9 agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print agent.module.params rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop()