示例#1
0
 def __init__(self):
     self.logfile = TimedFileLogger('/tmp/log.crtrlog')
     self.environment = CreateRobot()
     self.environment.fullMode()
     self.latencyTimer = Chrono()
     self.clock = Clock("CreateNexting")
     self.rewards = self.createRewardFunction()
     self.actions = [CreateAction(-200, +200)]
     self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
     self.obsHistory = ObsHistory(10, self.environment.legend())
     self.representation = TileCodersNoHashing(
         self.obsHistory.historyVectorSize(), 0, 4096)
     self.representation.includeActiveFeature()
     for name in self.sensorsOfInterest:
         for timeShift in range(self.HistoryLength):
             indexes = self.obsHistory.selectIndexes(timeShift, name)
             self.representation.addTileCoder(indexes, 64, 8)
     self.demons = DemonScheduler()
     self.verifiers = []
     for rewardFunction in self.rewards:
         for gamma in [0, 0.5, 0.75, 7 / 8., 15 / 16.]:
             demon = self.createOnPolicyPredictionDemon(
                 rewardFunction, gamma)
             verifier = PredictionDemonVerifier(demon)
             self.verifiers.append(verifier)
             self.demons.add(demon)
             self.demonToData[demon] = (verifier,
                                        rewardFunction.label() + str(gamma))
     self.x_t = None
 def __init__(self):
     self.environment = CritterbotSimulator()
     self.latencyTimer = Chrono()
     self.rewards = self.createRewardFunction()
     self.actions = XYThetaAction.sevenActions()
     self.behaviourPolicy = RandomPolicy(Random(0), self.actions)
     self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000)
     self.representation.includeActiveFeature()
     self.demons = DemonScheduler()
     for rewardFunction in self.rewards:
         self.demons.add(self.createOffPolicyControlDemon(rewardFunction))
     self.x_t = None