Пример #1
0
    def newEpisode(self):
        if self.learning:
            params = ravel(self.explorationlayer.module.params)
            target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500)
        
            if target != 0.0:
                self.gp.addSample(params, target)
                if len(self.gp.trainx) > 20:
                    self.gp.trainx = self.gp.trainx[-20:, :]
                    self.gp.trainy = self.gp.trainy[-20:]
                    self.gp.noise = self.gp.noise[-20:]
                    
                self.gp._calculate()
                        
                # get new parameters where mean was highest
                max_cov = diag(self.gp.pred_cov).max()
                indices = where(diag(self.gp.pred_cov) == max_cov)[0]
                pick = indices[random.randint(len(indices))]
                new_param = self.gp.testx[pick]
            
                # check if that one exists already in gp training set
                if len(where(self.gp.trainx == new_param)[0]) > 0:
                    # add some normal noise to it
                    new_param += random.normal(0, 1, len(new_param))

                self.explorationlayer.module._setParameters(new_param)

            else:
                self.explorationlayer.drawRandomWeights()
        
        # don't call StateDependentAgent.newEpisode() because it randomizes the params
        LearningAgent.newEpisode(self)
Пример #2
0
 def newEpisode(self):
     """ indicates a new episode in the training cycle. """
     LearningAgent.newEpisode(self)
     self.learner.perturbate()        
Пример #3
0
 def newEpisode(self):
     """ indicates a new episode in the training cycle. """
     LearningAgent.newEpisode(self)
     self.learner.perturbate()
Пример #4
0
 def newEpisode(self):
     LearningAgent.newEpisode(self)
     self.explorationlayer.drawRandomWeights()