Python StateDependentLayer примеры использования

Язык программирования: Python

Пространство имен/Пакет: pybrain.structure

Класс/Тип: StateDependentLayer

Примеров на hotexamples.com: 2

Python StateDependentLayer - 2 примера найдено. Это лучшие примеры Python кода для pybrain.structure.StateDependentLayer, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

StateDependentLayer(1)

drawRandomWeights(1)

setState(1)

Пример #1

Показать файл

 def __init__(self, module, learner = None):
     LearningAgent.__init__(self, module, learner)
     
     # exploration module (linear flat network)
     self.explorationmodule = buildNetwork(self.indim, self.outdim, bias=False)
     
     # state dependent exploration layer
     self.explorationlayer = StateDependentLayer(self.outdim, self.explorationmodule, 'explore')
             
     # add exploration layer to top of network through identity connection
     out = self.module.outmodules.pop()
     self.module.addOutputModule(self.explorationlayer)
     self.module.addConnection(IdentityConnection(out, self.module['explore'], self.module))
     self.module.sortModules()
     
     # tell learner the new module
     self.learner.setModule(self.module)
     
     # add the log likelihood (loglh) to the dataset and link it to the others
     self.history.addField('loglh', self.module.paramdim)
     self.history.link.append('loglh')
     self.loglh = None
     
     # if this flag is set to True, random weights are drawn after each reward,
     # effectively acting like the vanilla policy gradient alg.
     self.actaspg = False

Пример #2

Показать файл

class StateDependentAgent(PolicyGradientAgent):
    """ StateDependentAgent is a learning agent, that adds a GaussianLayer to its module and stores its
        deterministic inputs (mu) in the dataset. It keeps the weights of the exploration network
        constant for a whole episode which creates smooth trajectories rather than independent random 
        perturbations at each timestep. See "State-Dependent Exploration for Policy Gradient Methods",
        ECML PKDD 2008.
    """
    
    def __init__(self, module, learner = None):
        LearningAgent.__init__(self, module, learner)
        
        # exploration module (linear flat network)
        self.explorationmodule = buildNetwork(self.indim, self.outdim, bias=False)
        
        # state dependent exploration layer
        self.explorationlayer = StateDependentLayer(self.outdim, self.explorationmodule, 'explore')
                
        # add exploration layer to top of network through identity connection
        out = self.module.outmodules.pop()
        self.module.addOutputModule(self.explorationlayer)
        self.module.addConnection(IdentityConnection(out, self.module['explore'], self.module))
        self.module.sortModules()
        
        # tell learner the new module
        self.learner.setModule(self.module)
        
        # add the log likelihood (loglh) to the dataset and link it to the others
        self.history.addField('loglh', self.module.paramdim)
        self.history.link.append('loglh')
        self.loglh = None
        
        # if this flag is set to True, random weights are drawn after each reward,
        # effectively acting like the vanilla policy gradient alg.
        self.actaspg = False

    def newEpisode(self):
        LearningAgent.newEpisode(self)
        self.explorationlayer.drawRandomWeights()

    def getAction(self):
        self.explorationlayer.setState(self.lastobs)
        action = PolicyGradientAgent.getAction(self)
        return action
        
    def giveReward(self, r):
        PolicyGradientAgent.giveReward(self, r)
        if self.actaspg:
            self.explorationlayer.drawRandomWeights()