def reward(self, job, task, device): # default reward behaviour # jobReward = 100. if job.finished else -50 # * device.numJobsDone jobReward = 1. * job.finished #if job.finished > 0 else -.5 # * device.numJobsDone # jobReward = 1. * device.numJobsDone * 100. if job.totalEnergyCost != 0 and device in job.devicesEnergyCost: # if device not in job.devicesEnergyCost: # print(job.creator, job.processingNode, job.owner, job.finished, job.devicesEnergyCost) energyReward = -job.devicesEnergyCost[ device] / device.maxEnergyLevel * 1e2 # energyReward = -log2(job.totalEnergyCost) else: energyReward = 0 deathReward = -10. if device.gracefulFailure else 0 latestAction = "None" if job.latestAction is None else self.possibleActions[ job.latestAction] debug.learnOut( debug.formatLearn("Reward: %s (%s) j: %.2f e: %.2f d: %.2f", (self.__name__, latestAction, jobReward, energyReward, deathReward))) reward = jobReward + energyReward + deathReward # print("Reward: %20s (% 8s) r: %.2f j: %.2f e: %.2f d: %.2f" % (self.__name__, self.possibleActions[job.latestAction], reward, jobReward, energyReward, deathReward)) return reward
def updateState(self, task, job, device): self.updateSystem() debug.learnOut("updating systemState with [{}] [{}] [{}]".format(task, job, device), 'c') print("update", self.__class__) self.updateTask(task) self.updateJob(job) self.updateDevice(device)
def _setDecisions(self, devices): for i in range(len(self.possibleActions)): self.possibleActions[i].index = i debug.learnOut('actions %s' % self.possibleActions) # self.numOptions = len(self.possibleActions) self.numActions = len(self.possibleActions) # new model is created because it uses numActions # self.createModel() self.devices = devices
def finishTask(self): # usingReinforcementLearning = constants.OFFLOADING_POLICY == offloadingPolicy.REINFORCEMENT_LEARNING debug.out("adding processing task 1") # if offloading, this is before processing # if not self.job.processed: # move job to new owner debug.out("moving job to processingNode") # move job to the processing from the creator newOwner = self.job.processingNode # self.job.creator.waiting = True # if usingReinforcementLearning: # debug.learnOut("training before reevaluating") # debug.learnOut("backward before update") # # TODO: this the correct device? # self.owner.agent.train(self.job.currentTask, self.job, self.owner) # # systemState.current.update(self.job.currentTask, self.job, self.owner) # still old owner # # self.job.creator.decision.privateAgent.backward(self.job.reward(), self.job.finished) # TODO: rx job in tdsimulation likely broken because not adding received job to backlog (assuming subtask is created) self.job.moveTo(newOwner) # if using rl, reevalute decision # if usingReinforcementLearning: # print() debug.out("updating decision upon reception") debug.out("owner: {}".format(self.job.owner)) # systemState.current.update(self.job.currentTask, self.job, self.job.owner) # debug.out("systemstate: {}".format(systemState.current)) # # print("systemstate: {}".format(systemState.current)) # choice = self.job.owner.decision.privateAgent.forward(self.job.owner) # print("choice: {}".format(choice)) # self.job.setDecisionTarget(choice) # self.job.activate() choice = self.job.owner.agent.redecideDestination( self.job.currentTask, self.job, self.job.owner) debug.learnOut("redeciding choice %s" % choice) self.job.setDecisionTarget(choice) affected = self.job.activate() # warnings.warn("redecision isn't affected i think") # affected = choice.targetDevice # otherwise, just add it to the local batch # else: # affected = self.job.processingNode, batching(self.job) return rxMessage.finishTask(self, [affected])
def combineJobs(self, otherJob): # decision was made on other job if not reconsidering jobs if not self.owner.agent.reconsiderBatches: self.beforeState = np.array(otherJob.beforeState) # def combineEnergyCosts(self, otherJob): # print("combining costs from", otherJob.totalEnergyCost, "with", self.totalEnergyCost) self.finished += otherJob.finished self.latestAction = otherJob.latestAction # combine energy costs: learnOut("combining %s with %s (%.2f %.2f)" % (otherJob, self, otherJob.totalEnergyCost * 1e3, self.totalEnergyCost * 1e3)) for dev in otherJob.devicesEnergyCost: self.addEnergyCost(otherJob.devicesEnergyCost[dev], dev)
def continueBatch(self, previousJob): # assert task in self.batch assert self.currentBatch is not None if self.batchLength(self.currentBatch) == 0: debug.learnOut("no more in batch %s for %s" % (self.currentBatch, self)) # print("batch done", self.currentBatch) self.currentJob = None return None debug.learnOut("continue batch for %s (%d)" % (self.currentBatch, self.batchLength(self.currentBatch))) # for name in self.batch: # print("name", name) # for j in self.batch[name]: # print(j,) # assert task == self.currentBatch # decide whether to continue with batch or not possibleNextJob = self.batch[self.currentBatch][0] if self.agent.reconsiderBatches: newChoice = self.agent.redecideDestination(possibleNextJob.currentTask, possibleNextJob, self) debug.learnOut("decided to continue batch at %s?: %s" % (possibleNextJob, newChoice)) proceed = newChoice != BATCH else: # always continue batch newChoice = self.agent.getAction(LOCAL) # possibleNextJob.latestAction = self.agent.getActionIndex(newChoice) debug.learnOut("default to continue batch: %s" % newChoice) proceed = True if proceed: possibleNextJob.setDecisionTarget(newChoice) # if decided to continue with this batch if proceed: if self.batchLength(self.currentBatch) > 0: self.currentJob = self.batch[self.currentBatch][0] # previousJob is destroyed if offloaded due to graceful failure if not self.gracefulFailure: self.currentJob.combineJobs(previousJob) self.removeJobFromBatch(self.currentJob) return self.currentJob.activate() else: raise Exception("wanted to continue with batch but nothing available") else: self.currentJob = None return None
def finishTask(self): newSubtask = None # start first job in queue self.job.processingNode.currentBatch = self.job.currentTask learnOut( "processing batch %d (%s)" % (self.job.processingNode.batchLength(self.job.currentTask), self.job.processingNode.fpga.isConfigured(self.job.currentTask))) # consider graceful failure if enableGracefulFailure and not self.owner.gracefulFailure: # self.owner.performGracefulFailure() self.owner.checkGracefulFailure() # either fail or start processing new job if self.owner.gracefulFailure: learnOut("GRACEFUL FAILURE: %s" % self.owner) debug.out( "GRACEFUL FAILURE on %s %s %s" % (self.owner, self.owner.offloadingOptions, self.owner.batch)) # debug.infoOut("training from %s" % self.owner.agent.systemState.getStateDescription(self.job.beforeState)) # debug.infoOut("training to %s" % self.owner.agent.systemState.getStateDescription( # self.owner.agent.systemState.getIndex())) if not self.owner.hasOffloadingOptions(): # cannot offload to anything and dying return None else: self.job.beforeState = self.owner.agent.systemState.getCurrentState( self.job.currentTask, self.job, self.owner) # .getIndex() choice = self.owner.agent.getAction(OFFLOADING) self.job.latestAction = self.owner.agent.getActionIndex(choice) debug.out("choice %s %s" % (choice, self.owner.agent.latestAction)) choice.updateTargetDevice(self.owner, self.owner.offloadingOptions) debug.out("%s %s %s %s" % (choice.local, self.owner, self.owner.offloadingOptions, choice.targetDevice)) affectedDevice = self.owner self.job.processingNode = choice.targetDevice newSubtask = createMessage(self.job) debug.out("spraying %s" % self.job) # self.owner.agent.train(self.job.currentTask, self.job, self.owner, cause="Graceful Failure") # TODO: train based on failed jobs here else: affectedDevice = self.job.processingNode if self.job.hardwareAccelerated: if self.job.processingNode.fpga.isConfigured( self.job.currentTask): newSubtask = mcuFpgaOffload(self.job) else: newSubtask = reconfigureFPGA(self.job) else: newSubtask = processing(self.job) assert newSubtask is not None return subtask.finishTask(self, [(affectedDevice, newSubtask)])
def createModel(self): # create Q table debug.learnOut("qtable: (%d, %d)" % (self.systemState.getUniqueStates(), self.numActions)) self.model = qTable(self.systemState.getUniqueStates(), self.numActions, "Model")
def setProductionMode(self, value=True): debug.learnOut("switching dqn to production mode!", 'y') self.productionMode = value self.policy.eps = 0
def forward(self, task, job, device): # self.systemState.updateState(task, job, device) # debug.learnOut("forward", 'y') counters.NUM_FORWARD += 1 # currentSim = sim.simulations.Simulation.currentSimulation # job.beforeState = deepcopy(self.systemState) job.beforeState = self.systemState.getCurrentState(task, job, device) job.reset() sim.debug.out(debug.formatDebug("beforestate {}", job.beforeState)) # if job.beforeState[0] == 4 and job.beforeState[1] == 0: # debug.learnOut("%s choosing for %s" % (device, job)) # print(device.batchLengths(), device.batchLength(task), device.isQueueFull(task)) # special case if job queue is full if device.isQueueFull(task): actionIndex = self.numActions - 1 debug.learnOut( debug.formatLearn( "\nSpecial case! %s queue is full %s %d %s %s", (job, device.batchLengths(), actionIndex, self.possibleActions[actionIndex], job.beforeState)), 'r') # print("queue full") # check if no offloading is available elif not device.hasOffloadingOptions( ) and OFFLOADING in self.possibleActions: # if self.possibleActions[0] is not OFFLOADING: # print(self.possibleActions) # debug.out(self.possibleActions) # if self.possibleActions[0] is OFFLOADING: # elif len(self.devices): # actionIndex = np.random.randint(0, ) assert self.possibleActions[0] is OFFLOADING actionIndex = np.random.randint(1, self.numActions - 1) debug.out("no offloading available") # print("random") else: debug.out("getting action %s %s" % (device, device.batchLengths())) # choose best action based on current state actionIndex = self.selectAction(job.beforeState) debug.learnOut( debug.formatLearn( "\nChoose %s for %s: %d %s %s", (device, job, actionIndex, self.possibleActions[actionIndex], job.beforeState)), 'g') # qValues = self.predict(job.beforeState) # actionIndex = self.selectAction(qValues) # print("chose") job.latestAction = actionIndex job.history.add("action", actionIndex) assert self.possibleActions is not None choice = self.possibleActions[actionIndex] # sim.debug.learnOut("choice: {} ({})".format(choice, actionIndex), 'r') choice.updateTargetDevice(owner=device, offloadingDevices=device.offloadingOptions) # choice.updateTargetDevice(devices=self.devices) self.incrementChosenAction(choice) return choice
def removeOffloadingOption(self, device): if device in self.offloadingOptions: self.offloadingOptions.remove(device) debug.learnOut("removed offloading option %s %s" % (device, self.offloadingOptions))