def runExp(gamma=0, epsilon=0.1, xor=False, lr = 0.02): if xor: print "Attempting the XOR task" else: print "Attempting the AND task" task = XORTask() task.and_task = not xor l = Q_LinFA(task.nactions, task.nsenses) l.rewardDiscount = gamma l.learningRate = lr agent = LinFA_QAgent(l) agent.epsilon = epsilon exp = Experiment(task, agent) sofar = 0 for i in range(30): exp.doInteractions(100) print exp.task.cumreward - sofar, if i%10 == 9: print sofar = exp.task.cumreward l._decayLearningRate()
"""Using the agent found in the xor example, rather than in linearfa.py. """ from pybrain.rl.learners.valuebased.linearfa import Q_LinFA from pybrain.rl.experiments import EpisodicExperiment from environment import Environment from tasks import LinearFATileCoding3456BalanceTask from training import LinearFATraining from agents import LinFA_QAgent task = LinearFATileCoding3456BalanceTask() learner = Q_LinFA(task.nactions, task.outdim) task.discount = learner.rewardDiscount agent = LinFA_QAgent(learner) # The state has a huge number of dimensions, and the logging causes me to run # out of memory. We needn't log, since learning is done online. agent.logging = False agent.learning = True performance_agent = LinFA_QAgent(learner) performance_agent.logging = False performance_agent.greedy = True performance_agent.epsilon = 0.0 performance_agent.learning = False experiment = EpisodicExperiment(task, agent) # TODO PyBrain says that the learning rate needs to decay, but I don't see that # described in Randlov's paper. # A higher number here means the learning rate decays slower. learner.learningRateDecay = 100000 # NOTE increasing this number above from the default of 100 is what got the