from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import BASAgent, RBFEstimator, NNEstimator from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter from matplotlib import pyplot as plt from numpy import * # create agent, environment, renderer, experiment agent = BASAgent(estimatorClass=NNEstimator) environment = CartPoleEnvironment() experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.4, episodeCount=500) # experiment.addAdapter(explorer) experiment.runEpisodes(10) agent.forget() # explorer.decay = 0.999 # renderer = CartPoleRenderer()
from dopamine.environments import MirrorEnvironment from dopamine.agents import APIAgent, FQIAgent from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, BinaryActionSearchAdapter from dopamine.fapprox import * import numpy as np # create agent, environment, renderer, experiment agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False) agent.gamma = 2. agent.alpha = 1.0 agent.iterations = 1 agent.presentations = 1 environment = MirrorEnvironment() experiment = Experiment(environment, agent) # add bas adapter bas = BinaryActionSearchAdapter(3., 4., 10) experiment.addAdapter(bas) # add e-greedy exploration # explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000) # experiment.addAdapter(explorer) # run experiment valdata = experiment.evaluateEpisodes(1000) print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon # print "exploration:", explorer.epsilon experiment.runEpisodes(10000)
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer from dopamine.agents import QAgent, SARSAAgent from dopamine.experiments import Experiment from dopamine.adapters import EpsilonGreedyExplorer, VQStateDiscretizationAdapter from matplotlib import pyplot as plt from numpy import * import time # create agent, environment, renderer, experiment agent = SARSAAgent() environment = DiscreteCartPoleEnvironment() experiment = Experiment(environment, agent) # add discretization adapter discretizer = VQStateDiscretizationAdapter(30) experiment.addAdapter(discretizer) # add e-greedy exploration explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000) experiment.addAdapter(explorer) # force experiment setup now experiment.setup() for i in range(len(discretizer.stateVectors)): plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5))
# create agent, environment, renderer, experiment if options.play and os.path.exists('cart_play.saved'): print 'loading agent...' f = open('cart_play.saved', 'r') agent = cPickle.load(f) else: print "no saved agent found. start from scratch." agent = FQIAgent(estimatorClass=RBFEstimator) options.play = False agent.iterations = 1 environment = DiscreteCartPoleEnvironment() environment.conditions['actionNum'] = 2 environment.centerCart = False experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)]) experiment.addAdapter(normalizer) if options.play: renderer = CartPoleRenderer() environment.renderer = renderer renderer.start() print "10 evaluation trials:" valdata = experiment.evaluateEpisodes(10, visualize=False)
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer from dopamine.agents import FiniteDifferenceAgent, NNController from dopamine.adapters import IndexingAdapter, NormalizingAdapter from dopamine.experiments import Experiment from numpy import * environment = CartPoleEnvironment() environment.centerCart = False agent = FiniteDifferenceAgent(controllerClass=NNController) experiment = Experiment(environment, agent) # cut off last two state dimensions indexer = IndexingAdapter([0, 1], None) experiment.addAdapter(indexer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer) # run experiment for i in range(1000): experiment.runEpisodes(50) agent.learn() agent.forget() valdata = experiment.evaluateEpisodes(10) print "mean return", mean([sum(v.rewards) for v in valdata])
from dopamine.agents import ReinforceAgent, ENACAgent from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter from dopamine.experiments import Experiment from dopamine.fapprox import * from numpy import * from dopamine.tools import Episode maxSteps = 400 environment = CartPoleEnvironment(maxSteps=maxSteps) environment.centerCart = True renderer = CartPoleRenderer() agent = ENACAgent(faClass=Linear) experiment = Experiment(environment, agent) # cut off last two state dimensions # indexer = IndexingAdapter([0, 1], None) # experiment.addAdapter(indexer) # enhance state # def enhancef(state): # return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1] # enhancer = StateEnhancingAdapter(enhancef) # experiment.addAdapter(enhancer) # add normalization adapter normalizer = NormalizingAdapter(scaleActions=[(-50, 50)]) experiment.addAdapter(normalizer)
from dopamine.environments import MDPMaze from dopamine.agents import QAgent, SARSAAgent, QLambdaAgent from dopamine.experiments import Experiment from dopamine.adapters import MakeEpisodicAdapter, EpsilonGreedyExplorer, BoltzmannExplorer from matplotlib import pyplot as plt from numpy import * import time agent = QLambdaAgent() agent = SARSAAgent() environment = MDPMaze() experiment = Experiment(environment, agent) experiment.addAdapter(MakeEpisodicAdapter(1000)) explorer = BoltzmannExplorer(5, episodeCount=200) experiment.addAdapter(explorer) plt.ion() for i in range(1000): # run one episode and learn experiment.runEpisode(reset=True) agent.learn() agent.forget() shape = environment.mazeTable.shape
# inps = agent.estimator.dataset['input'] # tgts = agent.estimator.dataset['target'].flatten() # # red = where(inps[:,1])[0] # blue = where(inps[:,2])[0] # # plt.plot(inps[red,0].flatten(), tgts[red], 'sr', alpha=0.5) # plt.plot(inps[blue,0].flatten(), tgts[blue], 'sb', alpha=0.5) plt.gcf().canvas.draw() # create agent, environment, renderer, experiment agent = FQIAgent() environment = TestEnvironment() experiment = Experiment(environment, agent) # add normalization adapter # normalizer = NormalizingAdapter() # experiment.addAdapter(normalizer) # add e-greedy exploration # explorer = BoltzmannExplorer(2.0, episodeCount=1000) explorer = EpsilonGreedyExplorer(0.5, episodeCount=1000) experiment.addAdapter(explorer) # run 10 episodes to initialize the normalizing adapter for i in range(10): experiment.runEpisode(reset=True) # print "normalizing:", normalizer.minStates, normalizer.maxStates