def start_execution_process(self, driver): # make scheduling logger.info("Building Schedule") heft_schedule = run_heft(self.workflow, self.rm, self.estimator) Utility.Utility.validate_static_schedule(self.workflow, heft_schedule) logger.info("HEFT makespan: " + str(Utility.Utility.makespan(heft_schedule))) self.execution_process_start_time = time.time() self.current_schedule = heft_schedule self.run_next_tasks(driver)
def check_for_nodes_fault(self): finished_task_count = len(self.current_schedule.finished_node_item_pairs()) all_task_count = len(self.workflow.get_all_unique_tasks()) # if finished_task_count >= all_task_count*0.5: end_execution_time = time.time() start = datetime.datetime.fromtimestamp(self.execution_process_start_time) end = datetime.datetime.fromtimestamp(end_execution_time) diff = (end - start).seconds # if finished_task_count >= (all_task_count * 0.75) and not self.fail_has_been_generated: # bsm20 - 9 res - 1.42 min - deadline: 1.5 min # diff = [10, 25, 50, 75] # bsm10 - 9 res - 1.14 min - deadline: 1.2 min # diff - [8, 20, 40, 60] if diff >=10 and not self.fail_has_been_generated: logger.info("Tring to kill") self.fail_has_been_generated = True resources = list(sorted(self.active_resources.keys())) # resources_to_be_killed = resources[:int(len(resources)/2)] resources_to_be_killed = resources[:3] # resources_to_be_killed = [resources[0]] for executor_id in resources_to_be_killed: rinfo = self.active_resources[executor_id] rinfo.killExecutor(driver) rinfo.change_state(ResourceInfo.DEAD) #rinfo.askExecutor_PoisonPill(driver) # now we can count that resources has been gone. We need: # 1) update rm # 2) update schedule, marks part of tasks as a failed # 3) relaunch scheduling and remake schedule # 3) check if the new solution is within deadline border # 4) if it is not, try to generate new solution with reduced count tasks # 5) repeat 3 - 5 until either succesful is found or there is no option any more # 6) if solution has been found apply, other raise exception about deadline violation # return # raise NotImplementedError # update rm and estimator self.construct_scheduling_tools() # remove all planned tasks from the current schedule # and marks failed tasks new_mapping = {} for (node, items) in self.current_schedule.mapping.items(): new_node = self.rm.get_node_by_name(node.name) new_mapping[new_node] = [] for item in items: if new_node.state == Node.Down and item.state == ScheduleItem.EXECUTING: ## Note! in the old schedule (self.current_schedule) this tasks will be marked as failed too item.state = ScheduleItem.FAILED if item.state != ScheduleItem.UNSTARTED: new_mapping[new_node].append(item) clean_schedule = Schedule(new_mapping) logger.info("===RESCHEDULING PHASE===") logger.info("Current schedule: %s" % self.current_schedule) logger.info("Clean schedule: %s" % clean_schedule) ## replace current with reduced one (workflow-2) to save time # self.workflow = self.workflow_2 st = time.time() not_calculated_bsm, not_calculated_swan = self.wf_struct_opt(schedule=self.current_schedule, level=-14) end = time.time() logger.info("TIME to adapt executio process: %s" % (end - st)) # not_calculated_bsm, not_calculated_swan = [], [] # resume execution process heft_schedule = run_heft(self.workflow, self.rm, self.estimator, fixed_schedule=clean_schedule) # make it finished for id in not_calculated_bsm: node, item = heft_schedule.place_non_failed(id) item.state = ScheduleItem.FINISHED for id in not_calculated_swan: node, item = heft_schedule.place_non_failed(id) item.state = ScheduleItem.FINISHED # logger.info("NOT_CALCULATED_BSM: %s" % len(not_calculated_bsm)) logger.info("New Heft Schedule: %s" % heft_schedule) logger.info("New HEFT makespan: " + str(Utility.Utility.makespan(heft_schedule))) logger.info("===END RESCHEDULING PHASE===") self.current_schedule = heft_schedule self.run_next_tasks(self._driver) pass