示例#1
0
 def __init__(self, net, task, valueNetwork=None, **args):
     self.net = net
     self.task = task
     self.setArgs(**args)
     if self.valueLearningRate == None:
         self.valueLearningRate = self.learningRate
     if self.valueMomentum == None:
         self.valueMomentum = self.momentum        
     if self.supervisedPlotting:
         from pylab import ion
         ion() 
     
     # adaptive temperature:
     self.tau = 1.
     
     # prepare the datasets to be used
     self.weightedDs = ImportanceDataSet(self.task.outdim, self.task.indim)
     self.rawDs = ReinforcementDataSet(self.task.outdim, self.task.indim)
     self.valueDs = SequentialDataSet(self.task.outdim, 1)
     
     # prepare the supervised trainers
     self.bp = BackpropTrainer(self.net, self.weightedDs, self.learningRate,
                               self.momentum, verbose=False,
                               batchlearning=True)            
     
     # CHECKME: outsource
     self.vnet = valueNetwork
     if valueNetwork != None:
         self.vbp = BackpropTrainer(self.vnet, self.valueDs, self.valueLearningRate,
                                    self.valueMomentum, verbose=self.verbose)
         
     # keep information:
     self.totalSteps = 0
     self.totalEpisodes = 0
示例#2
0
    def __init__(self, indim, outdim):
        # store input and output dimension
        self.indim = indim
        self.outdim = outdim

        # create the history dataset
        self.history = ReinforcementDataSet(indim, outdim)
示例#3
0
    def __init__(self, indim, outdim):        
        # store input and output dimension
        self.indim = indim
        self.outdim = outdim
                
        # create history dataset
        self.remember = True
        self.history = ReinforcementDataSet(indim, outdim)

        # initialize temporary variables
        self.lastobs = None
        self.lastaction = None
示例#4
0
文件: q.py 项目: jaegs/AI_Practicum
 def __init__(self, ):
     Q.__init__(self, const.ALPHA, const.GAMMA)
     self.explorer = FeasibleEpsilonGreedyExplorer(const.EPSILON, const.DECAY)
     self.dataset2 = ReinforcementDataSet(1, 1)