示例#1
0
environment = CartPoleEnvironment()
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# # add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.4, episodeCount=500)
# experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

# explorer.decay = 0.999
# renderer = CartPoleRenderer()
# environment.renderer = renderer
# renderer.start()

# run experiment
for i in range(100):
    experiment.runEpisodes(5)
    agent.learn()

    # agent.forget()
    
    valdata = experiment.evaluateEpisodes(10, visualize=True)
示例#2
0
    print "mean return", mean_return
    raise SystemExit

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)
    
if os.path.exists('cart_play.saved'):
    os.remove('cart_play.saved')
      
# run experiment
for i in range(1000):
    valdata = experiment.evaluateEpisodes(20, visualize=True)
    mean_return = mean([sum(v.rewards) for v in valdata])

    experiment.runEpisodes(1)
    agent.learn()
    # agent.history.truncate(20)
    # agent.forget()
    
    # save file after each learning step
    f = open('cart_play.saved', 'w')
    cPickle.dump(agent, f)
    f.close()
    
    print normalizer.minStates, normalizer.maxStates
    print "params", agent.estimator.models[0].W
    print "exploration", explorer.epsilon
    print "mean return", mean_return
    print "num episodes", len(agent.history)
    print "num total samples", agent.history.numTotalSamples()
示例#3
0
import numpy as np

# create agent, environment, renderer, experiment
agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False)
agent.gamma = 2.
agent.alpha = 1.0
agent.iterations = 1
agent.presentations = 1

environment = MirrorEnvironment()
experiment = Experiment(environment, agent)

# add bas adapter
bas = BinaryActionSearchAdapter(3., 4., 10)
experiment.addAdapter(bas)

# add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000)
# experiment.addAdapter(explorer)

# run experiment
valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# print "exploration:", explorer.epsilon

experiment.runEpisodes(10000)
agent.learn()    

valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
示例#4
0
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import FiniteDifferenceAgent, NNController
from dopamine.adapters import IndexingAdapter, NormalizingAdapter
from dopamine.experiments import Experiment
from numpy import *

environment = CartPoleEnvironment()
environment.centerCart = False
agent = FiniteDifferenceAgent(controllerClass=NNController)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# run experiment
for i in range(1000):
    experiment.runEpisodes(50)
    agent.learn()
    agent.forget()

    valdata = experiment.evaluateEpisodes(10)
    print "mean return", mean([sum(v.rewards) for v in valdata])