def __init__(self, net, task, valueNetwork=None, **args): self.net = net self.task = task self.setArgs(**args) if self.valueLearningRate == None: self.valueLearningRate = self.learningRate if self.valueMomentum == None: self.valueMomentum = self.momentum if self.supervisedPlotting: from pylab import ion ion() # adaptive temperature: self.tau = 1. # prepare the datasets to be used self.weightedDs = ImportanceDataSet(self.task.outdim, self.task.indim) self.rawDs = ReinforcementDataSet(self.task.outdim, self.task.indim) self.valueDs = SequentialDataSet(self.task.outdim, 1) # prepare the supervised trainers self.bp = BackpropTrainer(self.net, self.weightedDs, self.learningRate, self.momentum, verbose=False, batchlearning=True) # CHECKME: outsource self.vnet = valueNetwork if valueNetwork != None: self.vbp = BackpropTrainer(self.vnet, self.valueDs, self.valueLearningRate, self.valueMomentum, verbose=self.verbose) # keep information: self.totalSteps = 0 self.totalEpisodes = 0
def __init__(self, indim, outdim): # store input and output dimension self.indim = indim self.outdim = outdim # create the history dataset self.history = ReinforcementDataSet(indim, outdim)
def __init__(self, indim, outdim): # store input and output dimension self.indim = indim self.outdim = outdim # create history dataset self.remember = True self.history = ReinforcementDataSet(indim, outdim) # initialize temporary variables self.lastobs = None self.lastaction = None
def __init__(self, ): Q.__init__(self, const.ALPHA, const.GAMMA) self.explorer = FeasibleEpsilonGreedyExplorer(const.EPSILON, const.DECAY) self.dataset2 = ReinforcementDataSet(1, 1)