from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer from dopamine.agents import FQIAgent from dopamine.fapprox import * from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = FQIAgent(faClass=RBF) agent.iterations = 1 agent.presentations = 1 environment = DiscreteCartPoleEnvironment() environment.conditions['actionNum'] = 2 environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) experiment.setup()
options, args = parse_opt() # create agent, environment, renderer, experiment if options.play and os.path.exists('cart_play.saved'): print 'loading agent...' f = open('cart_play.saved', 'r') agent = cPickle.load(f) else: print "no saved agent found. start from scratch." agent = FQIAgent(estimatorClass=RBFEstimator) options.play = False agent.iterations = 1 environment = DiscreteCartPoleEnvironment() environment.conditions['actionNum'] = 2 environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)]) experiment.addAdapter(normalizer) if options.play: renderer = CartPoleRenderer() environment.renderer = renderer
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer from dopamine.agents import FQIAgent from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = FQIAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter() experiment.addAdapter(normalizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.2, episodeCount=100) experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() explorer.decay = 0.999 renderer = CartPoleRenderer()