Exemplo n.º 1
0
env = CartPoleEnvironment()
env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)

# create task
task = BalanceTask(env, epilen)
# create controller network
net = buildNetwork(4, 1, bias=False)
# set parameters from command line
# create agent
agent = LearningAgent(net, None)
agent.module._setParameters(
    array([
        float(sys.argv[1]),
        float(sys.argv[2]),
        float(sys.argv[3]),
        float(sys.argv[4])
    ]))
agent.disableLearning()
# create experiment
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(episodes)
ret = []
for n in range(agent.history.getNumSequences()):
    returns = agent.history.getSequence(n)
    reward = returns[2]
    ret.append(sum(reward, 0).item())
print ret, "mean:", mean(ret)
env.getRenderer().stop()
Exemplo n.º 2
0
if len(sys.argv) < 5:
    sys.exit('please give 4 parameters. run: "python play.py <p1> <p2> <p3> <p4>"\n')
     
# create environment
env = CartPoleEnvironment()    
env.setRenderer(CartPoleRenderer())
env.getRenderer().start()
env.delay = (episodes == 1)

# create task
task = BalanceTask(env, epilen)
# create controller network
net = buildNetwork(4, 1, bias=False)
# set parameters from command line
# create agent
agent = LearningAgent(net, None)
agent.module._setParameters(array([float(sys.argv[1]), float(sys.argv[2]), float(sys.argv[3]), float(sys.argv[4])])) 
agent.disableLearning()
# create experiment
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(episodes)
ret = []
for n in range(agent.history.getNumSequences()):
    returns = agent.history.getSequence(n)
    reward = returns[2]
    ret.append( sum(reward, 0).item() )
print ret, "mean:",mean(ret)
env.getRenderer().stop()