Пример #1
0
def testAugmented():
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from mdpmap import MDPconverter
    from agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
def main():
    agent = SimpleMLPMarioAgent(
        10,
        inGridSize=3,
    )
    print agent.name
    NetworkWriter.writeToFile(agent.module,
                              "../temp/MarioNetwork-" + agent.name + ".xml")

    task = MarioTask(agent.name, timeLimit=200)
    exp = EpisodicExperiment(task, agent)
    res = 0
    cumul = 0
    for seed in [0]:
        for difficulty in [0, 3, 5, 10]:
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty

            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (
                difficulty, seed, task.reward)
            cumul += task.reward
            if task.reward < 4000:
                break
            res += 1
    print res
    print agent.module.inputbuffer * 1.
Пример #3
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent, InteractiveAgent
    from tools import makeGifVideo

    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g,
                          visualize=human,
                          recordingEnabled=True,
                          actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res

    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
Пример #4
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
Пример #5
0
 def playSubjectiveGame(game_str, map_str):
     from pybrain.rl.experiments.episodic import EpisodicExperiment
     from interfaces import GameTask
     from subjective import SubjectiveGame
     from agents import InteractiveAgent, UserTiredException
     g = VGDLParser().parseGame(game_str)
     g.buildLevel(map_str)
     senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
     task = GameTask(senv)
     iagent = InteractiveAgent()
     exper = EpisodicExperiment(task, iagent)
     try:
         exper.doEpisodes(1)
     except UserTiredException:
         pass
Пример #6
0
 def f(self, x):
     """ An episodic task can be used as an evaluation function of a module that produces actions
     from observations, or as an evaluator of an agent. """
     r = 0.
     for _ in range(self.batchSize):
         if isinstance(x, Module):
             x.reset()
             self.reset()
             while not self.isFinished():
                 self.performAction(x.activate(self.getObservation()))
         elif isinstance(x, Agent):
             EpisodicExperiment(self, x).doEpisodes()
         else:
             raise ValueError(self.__class__.__name__+' cannot evaluate the fitness of '+str(type(x)))
         r += self.getTotalReward()
     return r / float(self.batchSize)
Пример #7
0
 def __call__(self, module):
     """ An episodic task can be used as an evaluation function of a module that produces actions 
     from observations, or as an evaluator of an agent. """
     if isinstance(module, Module):
         module.reset()
         self.reset()
         while not self.isFinished():
             self.performAction(module.activate(self.getObservation()))
         return self.getTotalReward()
     elif isinstance(module, Agent):
         EpisodicExperiment(self, module).doEpisodes(self.batchSize)
         return self.getTotalReward() / float(self.batchSize)
     else:
         raise NotImplementedError('Missing implementation for ' +
                                   module.__class__.__name__ +
                                   ' evaluation')
def combinedScore(agent, task=None):
    """ Let the agent act on a number of levels of increasing difficulty. 
    Return the combined score."""
    if task == None:
        task = MarioTask(agentName=agent.name)
    exp = EpisodicExperiment(task, agent)
    res = 0
    for difficulty in range(1):
        for seed in range(1):
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (
                difficulty, seed, task.reward)
            res += task.reward
    return res
Пример #9
0
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=1)
    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.doEpisodes(3)
    print 'mm 1:', task.reward

    task.env.initMarioMode = 2
    exp.doEpisodes(1)
    print 'mm 2:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    print "finished"
Пример #10
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Пример #11
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from interfaces import GameTask
    from agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
Пример #12
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Пример #13
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.rl.agents.agent import Agent

    class DummyAgent(Agent):
        total = 4

        def getAction(self):
            res = randint(0, self.total - 1)
            return res

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Пример #14
0
# create agent
learner = ENAC()
learner.gd.rprop = True
# only relevant for RP
learner.gd.deltamin = 0.0001
#agent.learner.gd.deltanull = 0.05
# only relevant for BP
learner.gd.alpha = 0.01
learner.gd.momentum = 0.9

agent = LearningAgent(net, learner)
agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print(agent.module.params)

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
Пример #15
0
def main():
    """ Main program for automatic asset allocation problem.
    """
    # Directories
    input_data_dir = '../../Data/Input/'
    output_data_dir = '../../Data/Output/'

    # Experiment parameters
    batch = 1                      # Number of samples per learning step
    prnts = 100                    # Learning steps before printing results
    nEpisodes = 100/batch/prnts   # Number of rollouts
    nExperiments = 1               # Number of experiments
    et = ExTools(batch, prnts)     # Tool for printing and plotting

    # Paramenters
    X = 0.0 / 252    # Daily risk-free rate
    deltaP = 0.00    # Proportional transaction costs
    deltaF = 0.0      # Fixed transaction costs
    deltaS = 0.00    # Short-selling borrowing costs
    P = 5             # Number of past days the agent considers
    discount = 0.95   # Discount factor

    # Evaluation interval sizes
    start = P + 1
    trainingIntervalLength = 70
    testIntervalLength = 30

    # Initialize the market environment
    market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P)
    nSamples = len(market.data)
    nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength)

    # Initialize the asset allocation tasks
    task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount)

    # Initialize controller module
    module = buildNetwork(market.outdim,  # Input layer
                          market.indim,   # Output layer
                          outclass=SoftmaxLayer)  # Output activation function

    # Initialize learner module
    learner = PGPE(storeAllEvaluations=True,
                   learningRate=0.01,
                   sigmaLearningRate=0.01,
                   batchSize=batch,
                   # momentum=0.05,
                   # epsilon=6.0,
                   rprop=False)

    # Initialize learning agent
    agent = OptimizationAgent(module, learner)
    et.agent = agent

    for period in xrange(5):  #  nPeriods):

        # Set initial and final time steps for training
        initialTimeStep = start
        finalTimeStep = start + trainingIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.trainingMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Train the agent
        for episode in xrange(nEpisodes):
            for i in xrange(prnts):
                experiment.doEpisodes(batch)
            et.printResults((agent.learner._allEvaluations)[-50:-1],
                            1, episode)

        # Set initial and final time steps for training
        initialTimeStep = start + trainingIntervalLength
        finalTimeStep = initialTimeStep + testIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.backtestMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Test the agent
        experiment.doEpisodes(batch)

        # Slide evaluation window
        start += testIntervalLength

    # Print allocations
    task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE')
    plt.ylim(0.0, 1.0)
    plt.xlabel('Date')
    plt.ylabel('Portfolio Allocation')
    plt.show()

    # Print cumulative log-returns
    buyHold = market.data.ix[task.report.index, 'SPY']
    buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0)
    ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0)
    cumLogReturns = pd.DataFrame(index=task.report.index)
    cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns
    cumLogReturns['PGPE'] = ptfCumLogReturns
    cumLogReturns.plot(title='Cumulative Log-Returns - PGPE',
                       lw=2, grid=True)
    plt.xlabel('Date')
    plt.ylabel('Cumulative Log-Returns')
    plt.show()