def __init__(self, module, learner = None): LearningAgent.__init__(self, module, learner) # exploration module (linear flat network) self.explorationmodule = buildNetwork(self.indim, self.outdim, bias=False) # state dependent exploration layer self.explorationlayer = StateDependentLayer(self.outdim, self.explorationmodule, 'explore') # add exploration layer to top of network through identity connection out = self.module.outmodules.pop() self.module.addOutputModule(self.explorationlayer) self.module.addConnection(IdentityConnection(out, self.module['explore'], self.module)) self.module.sortModules() # tell learner the new module self.learner.setModule(self.module) # add the log likelihood (loglh) to the dataset and link it to the others self.history.addField('loglh', self.module.paramdim) self.history.link.append('loglh') self.loglh = None # if this flag is set to True, random weights are drawn after each reward, # effectively acting like the vanilla policy gradient alg. self.actaspg = False
def newEpisode(self): if self.learning: params = ravel(self.explorationlayer.module.params) target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500) if target != 0.0: self.gp.addSample(params, target) if len(self.gp.trainx) > 20: self.gp.trainx = self.gp.trainx[-20:, :] self.gp.trainy = self.gp.trainy[-20:] self.gp.noise = self.gp.noise[-20:] self.gp._calculate() # get new parameters where mean was highest max_cov = diag(self.gp.pred_cov).max() indices = where(diag(self.gp.pred_cov) == max_cov)[0] pick = indices[random.randint(len(indices))] new_param = self.gp.testx[pick] # check if that one exists already in gp training set if len(where(self.gp.trainx == new_param)[0]) > 0: # add some normal noise to it new_param += random.normal(0, 1, len(new_param)) self.explorationlayer.module._setParameters(new_param) else: self.explorationlayer.drawRandomWeights() # don't call StateDependentAgent.newEpisode() because it randomizes the params LearningAgent.newEpisode(self)
def getAction(self): """ activates the module with the last observation and stores the result as last action. """ # get greedy action action = LearningAgent.getAction(self) # explore by chance if random.random() < self.epsilon: action = array([random.randint(self.module.numActions)]) # reduce epsilon self.epsilon *= self.epsilondecay return action
def __init__(self, module, learner = None): assert isinstance(module, FeedForwardNetwork) assert len(module.outmodules) == 1 LearningAgent.__init__(self, module, learner) # create gaussian layer self.explorationlayer = GaussianLayer(self.outdim, name='gauss') self.explorationlayer.setSigma([-2] * self.outdim) # add gaussian layer to top of network through identity connection out = self.module.outmodules.pop() self.module.addOutputModule(self.explorationlayer) self.module.addConnection(IdentityConnection(out, self.module['gauss'])) self.module.sortModules() # tell learner the new module self.learner.setModule(self.module) # add the log likelihood (loglh) to the dataset and link it to the others self.history.addField('loglh', self.module.paramdim) self.history.link.append('loglh') self.loglh = None
def __init__(self, module, learner=None): assert isinstance(module, FeedForwardNetwork) assert len(module.outmodules) == 1 LearningAgent.__init__(self, module, learner) # create gaussian layer self.explorationlayer = GaussianLayer(self.outdim, name='gauss') self.explorationlayer.setSigma([-2] * self.outdim) # add gaussian layer to top of network through identity connection out = self.module.outmodules.pop() self.module.addOutputModule(self.explorationlayer) self.module.addConnection(IdentityConnection(out, self.module['gauss'])) self.module.sortModules() # tell learner the new module self.learner.setModule(self.module) # add the log likelihood (loglh) to the dataset and link it to the others self.history.addField('loglh', self.module.paramdim) self.history.link.append('loglh') self.loglh = None
def disableLearning(self): """ deactivate learning """ LearningAgent.disableLearning(self) self.explorationlayer.enabled = False
def enableLearning(self): """ activate learning """ LearningAgent.enableLearning(self) self.explorationlayer.enabled = True
def newEpisode(self): """ indicates a new episode in the training cycle. """ LearningAgent.newEpisode(self) self.learner.perturbate()
def __init__(self, indim, outdim): LearningAgent.__init__(self, buildNetwork(indim, outdim))
def newEpisode(self): LearningAgent.newEpisode(self) self.explorationlayer.drawRandomWeights()
def __init__(self, module, learner): LearningAgent.__init__(self, module, learner) self.epsilon = 0.5 self.epsilondecay = 0.9999