def testAugmented(): from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from mdpmap import MDPconverter from agents import PolicyDrivenAgent zelda_level2 = """ wwwwwwwwwwwww wA wwk1ww w ww ww 1 w ww wwww+w wwwww1ww www wwwww 0 Gww wwwwwwwwwwwww """ from examples.gridphysics.mazes.rigidzelda import rigidzelda_game g = VGDLParser().parseGame(rigidzelda_game) g.buildLevel(zelda_level2) env = GameEnvironment(g, visualize=False, recordingEnabled=True, actionDelay=150) C = MDPconverter(g, env=env, verbose=True) Ts, R, _ = C.convert() print C.states print Ts[0] print R env.reset() agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() task = GameTask(env) exper = EpisodicExperiment(task, agent) exper.doEpisodes(1)
def main(): agent = SimpleMLPMarioAgent( 10, inGridSize=3, ) print agent.name NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-" + agent.name + ".xml") task = MarioTask(agent.name, timeLimit=200) exp = EpisodicExperiment(task, agent) res = 0 cumul = 0 for seed in [0]: for difficulty in [0, 3, 5, 10]: task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % ( difficulty, seed, task.reward) cumul += task.reward if task.reward < 4000: break res += 1 print res print agent.module.inputbuffer * 1.
def testRecordingToGif(human=False): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent, InteractiveAgent from tools import makeGifVideo game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200) task = GameTask(env) if human: agent = InteractiveAgent() else: agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(1) print res actions = [a for _, a, _ in env._allEvents] print actions makeGifVideo(env, actions, initstate=env._initstate)
def test4(): """ Same thing, but animated. """ from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level from pybrain.rl.experiments.episodic import EpisodicExperiment from vgdl.interfaces import GameEnvironment, GameTask from vgdl.agents import PolicyDrivenAgent g = VGDLParser().parseGame(windy_stoch_game) g.buildLevel(windy_level) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(5) print res
def playSubjectiveGame(game_str, map_str): from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from subjective import SubjectiveGame from agents import InteractiveAgent, UserTiredException g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass
def f(self, x): """ An episodic task can be used as an evaluation function of a module that produces actions from observations, or as an evaluator of an agent. """ r = 0. for _ in range(self.batchSize): if isinstance(x, Module): x.reset() self.reset() while not self.isFinished(): self.performAction(x.activate(self.getObservation())) elif isinstance(x, Agent): EpisodicExperiment(self, x).doEpisodes() else: raise ValueError(self.__class__.__name__+' cannot evaluate the fitness of '+str(type(x))) r += self.getTotalReward() return r / float(self.batchSize)
def __call__(self, module): """ An episodic task can be used as an evaluation function of a module that produces actions from observations, or as an evaluator of an agent. """ if isinstance(module, Module): module.reset() self.reset() while not self.isFinished(): self.performAction(module.activate(self.getObservation())) return self.getTotalReward() elif isinstance(module, Agent): EpisodicExperiment(self, module).doEpisodes(self.batchSize) return self.getTotalReward() / float(self.batchSize) else: raise NotImplementedError('Missing implementation for ' + module.__class__.__name__ + ' evaluation')
def combinedScore(agent, task=None): """ Let the agent act on a number of levels of increasing difficulty. Return the combined score.""" if task == None: task = MarioTask(agentName=agent.name) exp = EpisodicExperiment(task, agent) res = 0 for difficulty in range(1): for seed in range(1): task.env.levelSeed = seed task.env.levelDifficulty = difficulty exp.doEpisodes(1) print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % ( difficulty, seed, task.reward) res += task.reward return res
def main(): agent = ForwardAgent() task = MarioTask(agent.name, initMarioMode=1) exp = EpisodicExperiment(task, agent) print 'Task Ready' exp.doEpisodes(3) print 'mm 1:', task.reward task.env.initMarioMode = 2 exp.doEpisodes(1) print 'mm 2:', task.reward task.env.initMarioMode = 0 exp.doEpisodes(1) print 'mm 0:', task.reward print "finished"
def testPolicyAgent(): from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_2 from agents import PolicyDrivenAgent game_str, map_str = polarmaze_game, maze_level_2 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=False, actionDelay=100) task = GameTask(env) agent = PolicyDrivenAgent.buildOptimal(env) env.visualize = True env.reset() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
def test3(): from examples.gridphysics.mazes import polarmaze_game from examples.gridphysics.mazes.simple import maze_level_1b from core import VGDLParser from pybrain.rl.experiments.episodic import EpisodicExperiment from interfaces import GameTask from agents import InteractiveAgent, UserTiredException game_str, map_str = polarmaze_game, maze_level_1b g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True) #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True) task = GameTask(senv) iagent = InteractiveAgent() exper = EpisodicExperiment(task, iagent) try: exper.doEpisodes(1) except UserTiredException: pass print senv._allEvents
def mlDriver(cv, stateTransfer, actionTransfer): #parameter setup #dimensionality of state argument (could be less than stateTransfer) stateDim = 352 #Number of moves possible numMoves = 361 env = SettleEnv(cv, stateTransfer, actionTransfer) task = SettleTask(env) controller = RestrictedActionValueNetwork(stateDim, numMoves, env) learner = NFQ() learner.explorer = EpsilonHackedExplorer(env) agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(10) print "Done with experiments" agent.learn() print "Learned" agent.reset() print "Cycled"
def testInteractions(): from random import randint from pybrain.rl.experiments.episodic import EpisodicExperiment from core import VGDLParser from examples.gridphysics.mazes import polarmaze_game, maze_level_1 from pybrain.rl.agents.agent import Agent class DummyAgent(Agent): total = 4 def getAction(self): res = randint(0, self.total - 1) return res game_str, map_str = polarmaze_game, maze_level_1 g = VGDLParser().parseGame(game_str) g.buildLevel(map_str) env = GameEnvironment(g, visualize=True, actionDelay=100) task = GameTask(env) agent = DummyAgent() exper = EpisodicExperiment(task, agent) res = exper.doEpisodes(2) print res
# create agent learner = ENAC() learner.gd.rprop = True # only relevant for RP learner.gd.deltamin = 0.0001 #agent.learner.gd.deltanull = 0.05 # only relevant for BP learner.gd.alpha = 0.01 learner.gd.momentum = 0.9 agent = LearningAgent(net, learner) agent.actaspg = False # create experiment experiment = EpisodicExperiment(task, agent) # print weights at beginning print(agent.module.params) rewards = [] if useGraphics: figure() ion() pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1]) pl.setLineStyle(linewidth=2) # queued version # experiment._fillQueue(30) # while True: # experiment._stepQueueLoop()
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()