def reward(self, job, task, device):
        # default reward behaviour
        # jobReward = 100. if job.finished else -50 # * device.numJobsDone
        jobReward = 1. * job.finished  #if job.finished > 0 else -.5 # * device.numJobsDone
        # jobReward = 1. * device.numJobsDone * 100.
        if job.totalEnergyCost != 0 and device in job.devicesEnergyCost:
            # if device not in job.devicesEnergyCost:
            # 	print(job.creator, job.processingNode, job.owner, job.finished, job.devicesEnergyCost)
            energyReward = -job.devicesEnergyCost[
                device] / device.maxEnergyLevel * 1e2
            # energyReward = -log2(job.totalEnergyCost)
        else:
            energyReward = 0

        deathReward = -10. if device.gracefulFailure else 0

        latestAction = "None" if job.latestAction is None else self.possibleActions[
            job.latestAction]
        debug.learnOut(
            debug.formatLearn("Reward: %s (%s) j: %.2f e: %.2f d: %.2f",
                              (self.__name__, latestAction, jobReward,
                               energyReward, deathReward)))

        reward = jobReward + energyReward + deathReward
        # print("Reward: %20s (% 8s) r: %.2f j: %.2f e: %.2f d: %.2f" % (self.__name__, self.possibleActions[job.latestAction], reward, jobReward, energyReward, deathReward))
        return reward
	def updateState(self, task, job, device):
		self.updateSystem()
		debug.learnOut("updating systemState with [{}] [{}] [{}]".format(task, job, device), 'c')
		print("update", self.__class__)
		self.updateTask(task)
		self.updateJob(job)
		self.updateDevice(device)
示例#3
0
    def _setDecisions(self, devices):
        for i in range(len(self.possibleActions)):
            self.possibleActions[i].index = i
        debug.learnOut('actions %s' % self.possibleActions)

        # self.numOptions = len(self.possibleActions)
        self.numActions = len(self.possibleActions)

        # new model is created because it uses numActions
        # self.createModel()

        self.devices = devices
示例#4
0
    def finishTask(self):
        # usingReinforcementLearning = constants.OFFLOADING_POLICY == offloadingPolicy.REINFORCEMENT_LEARNING

        debug.out("adding processing task 1")

        # if offloading, this is before processing
        # if not self.job.processed:
        # move job to new owner
        debug.out("moving job to processingNode")
        # move job to the processing from the creator
        newOwner = self.job.processingNode
        # self.job.creator.waiting = True

        # if usingReinforcementLearning:
        # 	debug.learnOut("training before reevaluating")
        # 	debug.learnOut("backward before update")
        # 	# TODO: this the correct device?
        # 	self.owner.agent.train(self.job.currentTask, self.job, self.owner)
        # 	# systemState.current.update(self.job.currentTask, self.job, self.owner) # still old owner
        # 	# self.job.creator.decision.privateAgent.backward(self.job.reward(), self.job.finished)

        # TODO: rx job in tdsimulation likely broken because not adding received job to backlog (assuming subtask is created)
        self.job.moveTo(newOwner)

        # if using rl, reevalute decision
        # if usingReinforcementLearning:
        # print()
        debug.out("updating decision upon reception")
        debug.out("owner: {}".format(self.job.owner))
        # systemState.current.update(self.job.currentTask, self.job, self.job.owner)
        # debug.out("systemstate: {}".format(systemState.current))

        # # print("systemstate: {}".format(systemState.current))
        # choice = self.job.owner.decision.privateAgent.forward(self.job.owner)
        # print("choice: {}".format(choice))

        # self.job.setDecisionTarget(choice)
        # self.job.activate()

        choice = self.job.owner.agent.redecideDestination(
            self.job.currentTask, self.job, self.job.owner)
        debug.learnOut("redeciding choice %s" % choice)
        self.job.setDecisionTarget(choice)
        affected = self.job.activate()
        # warnings.warn("redecision isn't affected i think")
        # affected = choice.targetDevice
        # otherwise, just add it to the local batch
        # else:
        # 	affected = self.job.processingNode, batching(self.job)

        return rxMessage.finishTask(self, [affected])
示例#5
0
    def combineJobs(self, otherJob):
        # decision was made on other job if not reconsidering jobs
        if not self.owner.agent.reconsiderBatches:
            self.beforeState = np.array(otherJob.beforeState)

            # def combineEnergyCosts(self, otherJob):
            # print("combining costs from", otherJob.totalEnergyCost, "with", self.totalEnergyCost)

            self.finished += otherJob.finished
            self.latestAction = otherJob.latestAction

        # combine energy costs:
        learnOut("combining %s with %s (%.2f %.2f)" %
                 (otherJob, self, otherJob.totalEnergyCost * 1e3,
                  self.totalEnergyCost * 1e3))
        for dev in otherJob.devicesEnergyCost:
            self.addEnergyCost(otherJob.devicesEnergyCost[dev], dev)
示例#6
0
	def continueBatch(self, previousJob):
		# assert task in self.batch
		assert self.currentBatch is not None

		if self.batchLength(self.currentBatch) == 0:
			debug.learnOut("no more in batch %s for %s" % (self.currentBatch, self))
			# print("batch done", self.currentBatch)
			self.currentJob = None
			return None
		debug.learnOut("continue batch for %s (%d)" % (self.currentBatch, self.batchLength(self.currentBatch)))
		# for name in self.batch:
		# 	print("name", name)
		# 	for j in self.batch[name]:
		# 		print(j,)
		# assert task == self.currentBatch

		# decide whether to continue with batch or not
		possibleNextJob = self.batch[self.currentBatch][0]
		if self.agent.reconsiderBatches:
			newChoice = self.agent.redecideDestination(possibleNextJob.currentTask, possibleNextJob, self)
			debug.learnOut("decided to continue batch at %s?: %s" % (possibleNextJob, newChoice))

			proceed = newChoice != BATCH

		else:
			# always continue batch
			newChoice = self.agent.getAction(LOCAL)
			# possibleNextJob.latestAction = self.agent.getActionIndex(newChoice)
			debug.learnOut("default to continue batch: %s" % newChoice)
			proceed = True

		if proceed:
			possibleNextJob.setDecisionTarget(newChoice)

		# if decided to continue with this batch
		if proceed:
			if self.batchLength(self.currentBatch) > 0:
				self.currentJob = self.batch[self.currentBatch][0]
				# previousJob is destroyed if offloaded due to graceful failure
				if not self.gracefulFailure:
					self.currentJob.combineJobs(previousJob)
				self.removeJobFromBatch(self.currentJob)

				return self.currentJob.activate()

			else:
				raise Exception("wanted to continue with batch but nothing available")
		else:
			self.currentJob = None

		return None
示例#7
0
    def finishTask(self):
        newSubtask = None
        # start first job in queue
        self.job.processingNode.currentBatch = self.job.currentTask
        learnOut(
            "processing batch %d (%s)" %
            (self.job.processingNode.batchLength(self.job.currentTask),
             self.job.processingNode.fpga.isConfigured(self.job.currentTask)))

        # consider graceful failure
        if enableGracefulFailure and not self.owner.gracefulFailure:
            # self.owner.performGracefulFailure()
            self.owner.checkGracefulFailure()

        # either fail or start processing new job
        if self.owner.gracefulFailure:
            learnOut("GRACEFUL FAILURE: %s" % self.owner)
            debug.out(
                "GRACEFUL FAILURE on %s %s %s" %
                (self.owner, self.owner.offloadingOptions, self.owner.batch))
            # debug.infoOut("training from %s" % self.owner.agent.systemState.getStateDescription(self.job.beforeState))
            # debug.infoOut("training to   %s" % self.owner.agent.systemState.getStateDescription(
            # 	self.owner.agent.systemState.getIndex()))

            if not self.owner.hasOffloadingOptions():
                # cannot offload to anything and dying
                return None
            else:

                self.job.beforeState = self.owner.agent.systemState.getCurrentState(
                    self.job.currentTask, self.job, self.owner)  # .getIndex()
                choice = self.owner.agent.getAction(OFFLOADING)
                self.job.latestAction = self.owner.agent.getActionIndex(choice)

                debug.out("choice %s %s" %
                          (choice, self.owner.agent.latestAction))
                choice.updateTargetDevice(self.owner,
                                          self.owner.offloadingOptions)
                debug.out("%s %s %s %s" %
                          (choice.local, self.owner,
                           self.owner.offloadingOptions, choice.targetDevice))

                affectedDevice = self.owner
                self.job.processingNode = choice.targetDevice
                newSubtask = createMessage(self.job)
                debug.out("spraying %s" % self.job)
                # self.owner.agent.train(self.job.currentTask, self.job, self.owner, cause="Graceful Failure")

            # TODO: train based on failed jobs here
        else:
            affectedDevice = self.job.processingNode

            if self.job.hardwareAccelerated:
                if self.job.processingNode.fpga.isConfigured(
                        self.job.currentTask):
                    newSubtask = mcuFpgaOffload(self.job)
                else:
                    newSubtask = reconfigureFPGA(self.job)
            else:
                newSubtask = processing(self.job)

        assert newSubtask is not None

        return subtask.finishTask(self, [(affectedDevice, newSubtask)])
 def createModel(self):
     # create Q table
     debug.learnOut("qtable: (%d, %d)" %
                    (self.systemState.getUniqueStates(), self.numActions))
     self.model = qTable(self.systemState.getUniqueStates(),
                         self.numActions, "Model")
示例#9
0
	def setProductionMode(self, value=True):
		debug.learnOut("switching dqn to production mode!", 'y')
		self.productionMode = value
		self.policy.eps = 0
示例#10
0
    def forward(self, task, job, device):
        # self.systemState.updateState(task, job, device)

        # debug.learnOut("forward", 'y')

        counters.NUM_FORWARD += 1

        # currentSim = sim.simulations.Simulation.currentSimulation
        # job.beforeState = deepcopy(self.systemState)
        job.beforeState = self.systemState.getCurrentState(task, job, device)
        job.reset()

        sim.debug.out(debug.formatDebug("beforestate {}", job.beforeState))

        # if job.beforeState[0] == 4 and job.beforeState[1] == 0:
        # debug.learnOut("%s choosing for %s" % (device, job))

        # print(device.batchLengths(), device.batchLength(task), device.isQueueFull(task))

        # special case if job queue is full
        if device.isQueueFull(task):
            actionIndex = self.numActions - 1
            debug.learnOut(
                debug.formatLearn(
                    "\nSpecial case! %s queue is full %s %d %s %s",
                    (job, device.batchLengths(), actionIndex,
                     self.possibleActions[actionIndex], job.beforeState)), 'r')

            # print("queue full")
        # check if no offloading is available
        elif not device.hasOffloadingOptions(
        ) and OFFLOADING in self.possibleActions:
            # if self.possibleActions[0] is not OFFLOADING:
            # 	print(self.possibleActions)
            # debug.out(self.possibleActions)
            # if self.possibleActions[0] is OFFLOADING:
            # elif len(self.devices):
            # 	actionIndex = np.random.randint(0, )
            assert self.possibleActions[0] is OFFLOADING
            actionIndex = np.random.randint(1, self.numActions - 1)
            debug.out("no offloading available")
            # print("random")
        else:
            debug.out("getting action %s %s" % (device, device.batchLengths()))
            # choose best action based on current state
            actionIndex = self.selectAction(job.beforeState)
            debug.learnOut(
                debug.formatLearn(
                    "\nChoose %s for %s: %d %s %s",
                    (device, job, actionIndex,
                     self.possibleActions[actionIndex], job.beforeState)), 'g')
            # qValues = self.predict(job.beforeState)
            # actionIndex = self.selectAction(qValues)
            # print("chose")
        job.latestAction = actionIndex
        job.history.add("action", actionIndex)

        assert self.possibleActions is not None
        choice = self.possibleActions[actionIndex]
        # sim.debug.learnOut("choice: {} ({})".format(choice, actionIndex), 'r')

        choice.updateTargetDevice(owner=device,
                                  offloadingDevices=device.offloadingOptions)
        # choice.updateTargetDevice(devices=self.devices)

        self.incrementChosenAction(choice)

        return choice
示例#11
0
	def removeOffloadingOption(self, device):
		if device in self.offloadingOptions:
			self.offloadingOptions.remove(device)
			debug.learnOut("removed offloading option %s %s" % (device, self.offloadingOptions))