示例#1
0
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer
from dopamine.agents import FQIAgent
from dopamine.fapprox import *
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *

# create agent, environment, renderer, experiment
agent = FQIAgent(faClass=RBF)
agent.iterations = 1
agent.presentations = 1
environment = DiscreteCartPoleEnvironment()
environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.3, episodeCount=1000)
experiment.addAdapter(explorer)

experiment.setup()
示例#2
0

options, args = parse_opt()

# create agent, environment, renderer, experiment
if options.play and os.path.exists('cart_play.saved'):
    print 'loading agent...'
    f = open('cart_play.saved', 'r')
    agent = cPickle.load(f)
else:
    print "no saved agent found. start from scratch."
    agent = FQIAgent(estimatorClass=RBFEstimator)
    options.play = False

agent.iterations = 1
environment = DiscreteCartPoleEnvironment()
environment.conditions['actionNum'] = 2
environment.centerCart = False
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter(normalizeStates=[(-0.8, 0.8), (-12., 12.)])
experiment.addAdapter(normalizer)

if options.play:
    renderer = CartPoleRenderer()
    environment.renderer = renderer
示例#3
0
from dopamine.environments import DiscreteCartPoleEnvironment, CartPoleRenderer
from dopamine.agents import FQIAgent
from dopamine.experiments import Experiment
from dopamine.adapters import EpsilonGreedyExplorer, NormalizingAdapter, IndexingAdapter

from matplotlib import pyplot as plt
from numpy import *


# create agent, environment, renderer, experiment
agent = FQIAgent()
environment = DiscreteCartPoleEnvironment()
experiment = Experiment(environment, agent)

# cut off last two state dimensions
indexer = IndexingAdapter([0, 1], None)
experiment.addAdapter(indexer)

# add normalization adapter
normalizer = NormalizingAdapter()
experiment.addAdapter(normalizer)

# add e-greedy exploration
explorer = EpsilonGreedyExplorer(0.2, episodeCount=100)
experiment.addAdapter(explorer)

experiment.runEpisodes(10)
agent.forget()

explorer.decay = 0.999
renderer = CartPoleRenderer()