env = CartPoleEnvironment() env.setRenderer(CartPoleRenderer()) env.getRenderer().start() env.delay = (episodes == 1) # create task task = BalanceTask(env, epilen) # create controller network net = buildNetwork(4, 1, bias=False) # set parameters from command line # create agent agent = LearningAgent(net, None) agent.module._setParameters( array([ float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4]) ])) agent.disableLearning() # create experiment experiment = EpisodicExperiment(task, agent) experiment.doEpisodes(episodes) ret = [] for n in range(agent.history.getNumSequences()): returns = agent.history.getSequence(n) reward = returns[2] ret.append(sum(reward, 0).item()) print ret, "mean:", mean(ret) env.getRenderer().stop()
if len(sys.argv) < 5: sys.exit('please give 4 parameters. run: "python play.py <p1> <p2> <p3> <p4>"\n') # create environment env = CartPoleEnvironment() env.setRenderer(CartPoleRenderer()) env.getRenderer().start() env.delay = (episodes == 1) # create task task = BalanceTask(env, epilen) # create controller network net = buildNetwork(4, 1, bias=False) # set parameters from command line # create agent agent = LearningAgent(net, None) agent.module._setParameters(array([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])) agent.disableLearning() # create experiment experiment = EpisodicExperiment(task, agent) experiment.doEpisodes(episodes) ret = [] for n in range(agent.history.getNumSequences()): returns = agent.history.getSequence(n) reward = returns[2] ret.append( sum(reward, 0).item() ) print ret, "mean:",mean(ret) env.getRenderer().stop()