def __init__(self): self.logfile = TimedFileLogger('/tmp/log.crtrlog') self.environment = CreateRobot() self.environment.fullMode() self.latencyTimer = Chrono() self.clock = Clock("CreateNexting") self.rewards = self.createRewardFunction() self.actions = [CreateAction(-200, +200)] self.behaviourPolicy = RandomPolicy(Random(0), self.actions) self.obsHistory = ObsHistory(10, self.environment.legend()) self.representation = TileCodersNoHashing( self.obsHistory.historyVectorSize(), 0, 4096) self.representation.includeActiveFeature() for name in self.sensorsOfInterest: for timeShift in range(self.HistoryLength): indexes = self.obsHistory.selectIndexes(timeShift, name) self.representation.addTileCoder(indexes, 64, 8) self.demons = DemonScheduler() self.verifiers = [] for rewardFunction in self.rewards: for gamma in [0, 0.5, 0.75, 7 / 8., 15 / 16.]: demon = self.createOnPolicyPredictionDemon( rewardFunction, gamma) verifier = PredictionDemonVerifier(demon) self.verifiers.append(verifier) self.demons.add(demon) self.demonToData[demon] = (verifier, rewardFunction.label() + str(gamma)) self.x_t = None
def __init__(self): self.environment = CritterbotSimulator() self.latencyTimer = Chrono() self.rewards = self.createRewardFunction() self.actions = XYThetaAction.sevenActions() self.behaviourPolicy = RandomPolicy(Random(0), self.actions) self.representation = TileCodersNoHashing(self.environment.legend().nbLabels(), -2000, 2000) self.representation.includeActiveFeature() self.demons = DemonScheduler() for rewardFunction in self.rewards: self.demons.add(self.createOffPolicyControlDemon(rewardFunction)) self.x_t = None