from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import ReinforceAgent, ENACAgent from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter from dopamine.experiments import Experiment from dopamine.fapprox import * from numpy import * from dopamine.tools import Episode maxSteps = 400 environment = CartPoleEnvironment(maxSteps=maxSteps) environment.centerCart = True renderer = CartPoleRenderer() agent = ENACAgent(faClass=Linear) experiment = Experiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # enhance state # def enhancef(state): # return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1] # enhancer = StateEnhancingAdapter(enhancef) # experiment.addAdapter(enhancer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer)
agent.iterations = 1 environment = DiscreteCartPoleEnvironment() environment.conditions['actionNum'] = 2 environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)]) experiment.addAdapter(normalizer) if options.play: renderer = CartPoleRenderer() environment.renderer = renderer renderer.start() print "10 evaluation trials:" valdata = experiment.evaluateEpisodes(10, visualize=False) mean_return = mean([sum(v.rewards) for v in valdata]) print "mean return", mean_return raise SystemExit # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) if os.path.exists('cart_play.saved'): os.remove('cart_play.saved')
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import ReinforceAgent from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer from dopamine.experiments import Experiment from dopamine.fapprox import * from numpy import * from dopamine.tools import Episode environment = CartPoleEnvironment(maxSteps=100) environment.centerCart = False renderer = CartPoleRenderer() agent = ReinforceAgent(faClass=Linear) experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # add gaussian explorer explorer = LinearSDExplorer(sigma=2.) explorer.sigmaAdaptation = False experiment.addAdapter(explorer) explorer = GaussianExplorer(sigma=0.) explorer.sigmaAdaptation = True