Пример #1
0
 def __init__(self, module, learner = None):
     LearningAgent.__init__(self, module, learner)
     
     # exploration module (linear flat network)
     self.explorationmodule = buildNetwork(self.indim, self.outdim, bias=False)
     
     # state dependent exploration layer
     self.explorationlayer = StateDependentLayer(self.outdim, self.explorationmodule, 'explore')
             
     # add exploration layer to top of network through identity connection
     out = self.module.outmodules.pop()
     self.module.addOutputModule(self.explorationlayer)
     self.module.addConnection(IdentityConnection(out, self.module['explore'], self.module))
     self.module.sortModules()
     
     # tell learner the new module
     self.learner.setModule(self.module)
     
     # add the log likelihood (loglh) to the dataset and link it to the others
     self.history.addField('loglh', self.module.paramdim)
     self.history.link.append('loglh')
     self.loglh = None
     
     # if this flag is set to True, random weights are drawn after each reward,
     # effectively acting like the vanilla policy gradient alg.
     self.actaspg = False
Пример #2
0
    def newEpisode(self):
        if self.learning:
            params = ravel(self.explorationlayer.module.params)
            target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500)
        
            if target != 0.0:
                self.gp.addSample(params, target)
                if len(self.gp.trainx) > 20:
                    self.gp.trainx = self.gp.trainx[-20:, :]
                    self.gp.trainy = self.gp.trainy[-20:]
                    self.gp.noise = self.gp.noise[-20:]
                    
                self.gp._calculate()
                        
                # get new parameters where mean was highest
                max_cov = diag(self.gp.pred_cov).max()
                indices = where(diag(self.gp.pred_cov) == max_cov)[0]
                pick = indices[random.randint(len(indices))]
                new_param = self.gp.testx[pick]
            
                # check if that one exists already in gp training set
                if len(where(self.gp.trainx == new_param)[0]) > 0:
                    # add some normal noise to it
                    new_param += random.normal(0, 1, len(new_param))

                self.explorationlayer.module._setParameters(new_param)

            else:
                self.explorationlayer.drawRandomWeights()
        
        # don't call StateDependentAgent.newEpisode() because it randomizes the params
        LearningAgent.newEpisode(self)
Пример #3
0
 def getAction(self):
     """ activates the module with the last observation and stores the result as last action. """
     # get greedy action
     action = LearningAgent.getAction(self)
     
     # explore by chance
     if random.random() < self.epsilon:
         action = array([random.randint(self.module.numActions)])
     
     # reduce epsilon
     self.epsilon *= self.epsilondecay
     
     return action
Пример #4
0
 def __init__(self, module, learner = None):
     assert isinstance(module, FeedForwardNetwork)
     assert len(module.outmodules) == 1
     
     LearningAgent.__init__(self, module, learner)
     
     # create gaussian layer
     self.explorationlayer = GaussianLayer(self.outdim, name='gauss')
     self.explorationlayer.setSigma([-2] * self.outdim)
     
     # add gaussian layer to top of network through identity connection
     out = self.module.outmodules.pop()
     self.module.addOutputModule(self.explorationlayer)
     self.module.addConnection(IdentityConnection(out, self.module['gauss']))
     self.module.sortModules()
     
     # tell learner the new module
     self.learner.setModule(self.module)
     
     # add the log likelihood (loglh) to the dataset and link it to the others
     self.history.addField('loglh', self.module.paramdim)
     self.history.link.append('loglh')
     self.loglh = None
Пример #5
0
    def __init__(self, module, learner=None):
        assert isinstance(module, FeedForwardNetwork)
        assert len(module.outmodules) == 1

        LearningAgent.__init__(self, module, learner)

        # create gaussian layer
        self.explorationlayer = GaussianLayer(self.outdim, name='gauss')
        self.explorationlayer.setSigma([-2] * self.outdim)

        # add gaussian layer to top of network through identity connection
        out = self.module.outmodules.pop()
        self.module.addOutputModule(self.explorationlayer)
        self.module.addConnection(IdentityConnection(out,
                                                     self.module['gauss']))
        self.module.sortModules()

        # tell learner the new module
        self.learner.setModule(self.module)

        # add the log likelihood (loglh) to the dataset and link it to the others
        self.history.addField('loglh', self.module.paramdim)
        self.history.link.append('loglh')
        self.loglh = None
Пример #6
0
 def disableLearning(self):
     """ deactivate learning """
     LearningAgent.disableLearning(self)
     self.explorationlayer.enabled = False
Пример #7
0
 def enableLearning(self):
     """ activate learning """
     LearningAgent.enableLearning(self)
     self.explorationlayer.enabled = True
Пример #8
0
 def newEpisode(self):
     """ indicates a new episode in the training cycle. """
     LearningAgent.newEpisode(self)
     self.learner.perturbate()        
Пример #9
0
 def __init__(self, indim, outdim):        
     LearningAgent.__init__(self, buildNetwork(indim, outdim))
Пример #10
0
 def newEpisode(self):
     """ indicates a new episode in the training cycle. """
     LearningAgent.newEpisode(self)
     self.learner.perturbate()
Пример #11
0
 def disableLearning(self):
     """ deactivate learning """
     LearningAgent.disableLearning(self)
     self.explorationlayer.enabled = False
Пример #12
0
 def enableLearning(self):
     """ activate learning """
     LearningAgent.enableLearning(self)
     self.explorationlayer.enabled = True
Пример #13
0
 def __init__(self, indim, outdim):
     LearningAgent.__init__(self, buildNetwork(indim, outdim))
Пример #14
0
 def newEpisode(self):
     LearningAgent.newEpisode(self)
     self.explorationlayer.drawRandomWeights()
Пример #15
0
 def __init__(self, module, learner):
     LearningAgent.__init__(self, module, learner)
     
     self.epsilon = 0.5
     self.epsilondecay = 0.9999