示例#1
0
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import BASAgent, RBFEstimator, NNEstimator
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *


# create agent, environment, renderer, experiment
agent = BASAgent(estimatorClass=NNEstimator)
environment = CartPoleEnvironment()
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# # add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.4, episodeCount=500)
# experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

# explorer.decay = 0.999
# renderer = CartPoleRenderer()
示例#2
0
from dopamine.environments import MirrorEnvironment
from dopamine.agents import APIAgent, FQIAgent
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, BinaryActionSearchAdapter
from dopamine.fapprox import *
import numpy as np

# create agent, environment, renderer, experiment
agent = APIAgent(faClass=LWPRFA, resetFA=True, vectorblock=False)
agent.gamma = 2.
agent.alpha = 1.0
agent.iterations = 1
agent.presentations = 1

environment = MirrorEnvironment()
experiment = Experiment(environment, agent)

# add bas adapter
bas = BinaryActionSearchAdapter(3., 4., 10)
experiment.addAdapter(bas)

# add e-greedy exploration
# explorer = EpsilonGreedyExplorer(0.5, episodeCount=10000)
# experiment.addAdapter(explorer)

# run experiment
valdata = experiment.evaluateEpisodes(1000)
print "mean rewards:", np.mean([sum(e.rewards) for e in valdata]) #, "exploration:", explorer.epsilon
# print "exploration:", explorer.epsilon

experiment.runEpisodes(10000)
示例#3
0
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer

from dopamine.agents import QAgent, SARSAAgent
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, VQStateDiscretizationAdapter

from matplotlib import pyplot as plt
from numpy import *
import time


# create agent, environment, renderer, experiment
agent = SARSAAgent()
environment = DiscreteCartPoleEnvironment()

experiment = Experiment(environment, agent)

# add discretization adapter
discretizer = VQStateDiscretizationAdapter(30)
experiment.addAdapter(discretizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)

# force experiment setup now
experiment.setup()

for i in range(len(discretizer.stateVectors)):
    plt.text(discretizer.stateVectors[i,0], discretizer.stateVectors[i,1], "%i"%i, bbox=dict(facecolor='green', alpha=0.5))
示例#4
0
# create agent, environment, renderer, experiment
if options.play and os.path.exists('cart_play.saved'):
    print 'loading agent...'
    f = open('cart_play.saved', 'r')
    agent = cPickle.load(f)
else:
    print "no saved agent found. start from scratch."
    agent = FQIAgent(estimatorClass=RBFEstimator)
    options.play = False

agent.iterations = 1
environment = DiscreteCartPoleEnvironment()
environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)])
experiment.addAdapter(normalizer)

if options.play:
    renderer = CartPoleRenderer()
    environment.renderer = renderer
    renderer.start()
    print "10 evaluation trials:"
    valdata = experiment.evaluateEpisodes(10, visualize=False)
示例#5
0
from dopamine.environments import CartPoleEnvironment, CartPoleRenderer
from dopamine.agents import FiniteDifferenceAgent, NNController
from dopamine.adapters import IndexingAdapter, NormalizingAdapter
from dopamine.experiments import Experiment
from numpy import *

environment = CartPoleEnvironment()
environment.centerCart = False
agent = FiniteDifferenceAgent(controllerClass=NNController)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)

# run experiment
for i in range(1000):
    experiment.runEpisodes(50)
    agent.learn()
    agent.forget()

    valdata = experiment.evaluateEpisodes(10)
    print "mean return", mean([sum(v.rewards) for v in valdata])

示例#6
0
from dopamine.agents import ReinforceAgent, ENACAgent
from dopamine.adapters import IndexingAdapter, NormalizingAdapter, GaussianExplorer, LinearSDExplorer, StateEnhancingAdapter
from dopamine.experiments import Experiment
from dopamine.fapprox import *
from numpy import *

from dopamine.tools import Episode

maxSteps = 400
environment = CartPoleEnvironment(maxSteps=maxSteps)
environment.centerCart = True

renderer = CartPoleRenderer()

agent = ENACAgent(faClass=Linear)
experiment = Experiment(environment, agent)

# cut off last two state dimensions
# indexer = IndexingAdapter([0, 1], None)
# experiment.addAdapter(indexer)

# enhance state
# def enhancef(state):
#     return r_[state, state[0]**2, abs(state[2]), sin(state[1]), cos(state[1]), 1]
# enhancer = StateEnhancingAdapter(enhancef)
# experiment.addAdapter(enhancer)

# add normalization adapter
normalizer = NormalizingAdapter(scaleActions=[(-50, 50)])
experiment.addAdapter(normalizer)
示例#7
0
from dopamine.environments import MDPMaze
from dopamine.agents import QAgent, SARSAAgent, QLambdaAgent
from dopamine.experiments import Experiment
from dopamine.adapters import MakeEpisodicAdapter, EpsilonGreedyExplorer, BoltzmannExplorer

from matplotlib import pyplot as plt
from numpy import *
import time


agent = QLambdaAgent()
agent = SARSAAgent()

environment = MDPMaze()
experiment = Experiment(environment, agent)
experiment.addAdapter(MakeEpisodicAdapter(1000))


explorer = BoltzmannExplorer(5, episodeCount=200)
experiment.addAdapter(explorer)

plt.ion()

for i in range(1000):
    # run one episode and learn
    experiment.runEpisode(reset=True)
    agent.learn()
    agent.forget()

    shape = environment.mazeTable.shape
    
示例#8
0
    
    # inps = agent.estimator.dataset['input']
    # tgts = agent.estimator.dataset['target'].flatten()
    # 
    # red = where(inps[:,1])[0]
    # blue = where(inps[:,2])[0]
    # 
    # plt.plot(inps[red,0].flatten(), tgts[red], 'sr', alpha=0.5)
    # plt.plot(inps[blue,0].flatten(), tgts[blue], 'sb', alpha=0.5)
    plt.gcf().canvas.draw()

# create agent, environment, renderer, experiment
agent = FQIAgent()
environment = TestEnvironment()
experiment = Experiment(environment, agent)

# add normalization adapter
# normalizer = NormalizingAdapter()
# experiment.addAdapter(normalizer)

# add e-greedy exploration
# explorer = BoltzmannExplorer(2.0, episodeCount=1000)
explorer = EpsilonGreedyExplorer(0.5, episodeCount=1000)
experiment.addAdapter(explorer)

# run 10 episodes to initialize the normalizing adapter
for i in range(10):
    experiment.runEpisode(reset=True)

# print "normalizing:", normalizer.minStates, normalizer.maxStates