environment = CartPoleEnvironment() experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.4, episodeCount=500) # experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() # explorer.decay = 0.999 # renderer = CartPoleRenderer() # environment.renderer = renderer # renderer.start() # run experiment for i in range(100): experiment.runEpisodes(5) agent.learn() # agent.forget() valdata = experiment.evaluateEpisodes(10, visualize=True)
print "mean return", mean_return raise SystemExit # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) if os.path.exists('cart_play.saved'): os.remove('cart_play.saved') # run experiment for i in range(1000): valdata = experiment.evaluateEpisodes(20, visualize=True) mean_return = mean([sum(v.rewards) for v in valdata]) experiment.runEpisodes(1) agent.learn() # agent.history.truncate(20) # agent.forget() # save file after each learning step f = open('cart_play.saved', 'w') cPickle.dump(agent, f) f.close() print normalizer.minStates, normalizer.maxStates print "params", agent.estimator.models[0].W print "exploration", explorer.epsilon print "mean return", mean_return print "num episodes", len(agent.history) print "num total samples", agent.history.numTotalSamples()
import numpy as np # create agent, environment, renderer, experiment agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False) agent.gamma = 2. agent.alpha = 1.0 agent.iterations = 1 agent.presentations = 1 environment = MirrorEnvironment() experiment = Experiment(environment, agent) # add bas adapter bas = BinaryActionSearchAdapter(3., 4., 10) experiment.addAdapter(bas) # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000) # experiment.addAdapter(explorer) # run experiment valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon # print "exploration:", explorer.epsilon experiment.runEpisodes(10000) agent.learn() valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import FiniteDifferenceAgent, NNController from dopamine.adapters import IndexingAdapter, NormalizingAdapter from dopamine.experiments import Experiment from numpy import * environment = CartPoleEnvironment() environment.centerCart = False agent = FiniteDifferenceAgent(controllerClass=NNController) experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # run experiment for i in range(1000): experiment.runEpisodes(50) agent.learn() agent.forget() valdata = experiment.evaluateEpisodes(10) print "mean return", mean([sum(v.rewards) for v in valdata])