def mapping(self, sorted_jobs, existing_plan, live_nodes, commcost, compcost): """def allocate(job, orders, jobson, prec, compcost, commcost):""" """ Allocate job to the machine with earliest finish time Operates in place """ ## TODO: add finished tasks jobson = dict() for (node, items) in existing_plan.items(): for item in items: if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING: jobson[item.job] = node new_plan = existing_plan def ft(machine): #cost = st(machine) runtime = compcost(task, machine) cost = st(machine, runtime) + runtime ##print("machine: %s job:%s cost: %s" % (machine.name, task.id, cost)) ##print("machine: " + str(machine.name) + " cost: " + str(cost)) return cost if len(live_nodes) != 0: ## in case if there is not any live nodes we just return the same cleaned schedule for wf, tasks in sorted_jobs: ##wf_dag = self.convert_to_parent_children_map(wf) wf_dag = HeftHelper.convert_to_parent_children_map(wf) prec = reverse_dict(wf_dag) for task in tasks: st = partial(self.start_time, wf, task, new_plan, jobson, prec, commcost) # ress = [(key, ft(key)) for key in new_plan.keys()] # agent_pair = min(ress, key=lambda x: x[1][0]) # agent = agent_pair[0] # start = agent_pair[1][0] # end = agent_pair[1][1] # agent = min(new_plan.keys(), key=ft) agent = min(live_nodes, key=ft) runtime = compcost(task, agent) start = st(agent, runtime) end = ft(agent) # new_plan[agent].append(ScheduleItem(task, start, end)) Schedule.insert_item(new_plan, agent, ScheduleItem(task, start, end)) jobson[task] = agent new_sched = Schedule(new_plan) return new_sched
def build_schedule(workflow, estimator, resource_manager, solution): """ the solution consists all parts necessary to build whole solution For the moment, it is mentioned that all species taking part in algorithm are necessary to build complete solution solution = { s1.name: val1, s2.name: val2, .... } """ ms = solution[MAPPING_SPECIE] os = solution[ORDERING_SPECIE] assert check_precedence(workflow, os), "Precedence is violated" ms = {t: resource_manager.byName(n) for t, n in ms} schedule_mapping = {n: [] for n in set(ms.values())} task_to_node = {} for t in os: node = ms[t] t = workflow.byId(t) (start_time, end_time) = place_task_to_schedule(workflow, estimator, schedule_mapping, task_to_node, ms, t, node, 0) task_to_node[t.id] = (node, start_time, end_time) schedule = Schedule(schedule_mapping) return schedule
def schedule(self): """ create inter-priority """ def byPriority(wf): return 0 if wf.priority is None else wf.priority ##simple inter priority sorting sorted_wfs = sorted(self.workflows, key=byPriority) wf_jobs = {wf: [] for wf in sorted_wfs} resources = self.resource_manager.get_resources() ##print("common nodes count:" + str(len(toNodes(resources)))) nodes = HeftHelper.to_nodes(resources) wf_jobs = {wf: self.make_ranking(wf, nodes) for wf in sorted_wfs} ##new_schedule = self.get_unchanged_schedule(self.old_schedule, time) new_schedule = Schedule({node: [] for node in nodes}) new_plan = new_schedule.mapping for (wf, jobs) in wf_jobs.items(): new_schedule = self.mapping([(wf, jobs)], new_plan, nodes, self.commcost, self.compcost) new_plan = new_schedule.mapping return new_schedule
def default_fixed_schedule_part(resource_manager): fix_schedule_part = Schedule({ node: [] for node in HeftHelper.to_nodes( resource_manager.get_resources()) }) return fix_schedule_part
def run_heft(workflow, resource_manager, estimator): """ It simply runs src with empty initial schedule and returns complete schedule """ heft = DynamicHeft(workflow, resource_manager, estimator) nodes = resource_manager.get_nodes() init_schedule = Schedule({node: [] for node in nodes}) return heft.run(init_schedule)
def run_peft(workflow, resource_manager, estimator): """ It simply runs peft with empty initial schedule and returns complete schedule """ oct = PeftHelper.get_OCT(workflow, resource_manager, estimator) peft = DynamicPeft(workflow, resource_manager, estimator, oct) nodes = resource_manager.get_nodes() init_schedule = Schedule({node: [] for node in nodes}) return peft.run(init_schedule)
def __init__(self, workflow, resource_manager, estimator, ranking=None): self.current_schedule = Schedule(dict()) self.workflow = workflow self.resource_manager = resource_manager self.estimator = estimator self.ranking = ranking self.current_time = 0 nodes = self.get_nodes() pass
def clean_unfinished(schedule): def clean(items): return [ item for item in items if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING ] new_mapping = { node: clean(items) for (node, items) in schedule.mapping.items() } return Schedule(new_mapping)
def _run_heft(): dynamic_planner = DynamicHeft(wf, resource_manager, estimator) nodes = HeftHelper.to_nodes(resource_manager.resources) current_cleaned_schedule = Schedule({node: [] for node in nodes}) schedule_dynamic_heft = dynamic_planner.run( current_cleaned_schedule) self._validate(wf, estimator, schedule_dynamic_heft) if is_visualized: viz.visualize_task_node_mapping(wf, schedule_dynamic_heft) # Utility.create_jedule_visualization(schedule_dynamic_heft, wf_name+'_heft') pass return schedule_dynamic_heft
def __call__(self): _wf = wf(self.wf_name) rm = ExperimentResourceManager(rg.r([10, 15, 25, 30])) estimator = ModelTimeEstimator(bandwidth=10) empty_fixed_schedule_part = Schedule({node: [] for node in rm.get_nodes()}) heft_schedule = run_heft(_wf, rm, estimator) fixed_schedule = empty_fixed_schedule_part ga_functions = GAFunctions2(_wf, rm, estimator) generate = partial(ga_generate, ga_functions=ga_functions, fixed_schedule_part=fixed_schedule, current_time=0.0, init_sched_percent=0.05, initial_schedule=heft_schedule) stats = tools.Statistics(lambda ind: ind.fitness.values[0]) stats.register("avg", numpy.mean) stats.register("std", numpy.std) stats.register("min", numpy.min) stats.register("max", numpy.max) logbook = tools.Logbook() logbook.header = ["gen", "evals"] + stats.fields toolbox = Toolbox() toolbox.register("generate", generate) toolbox.register("evaluate", fit_converter(ga_functions.build_fitness(empty_fixed_schedule_part, 0.0))) toolbox.register("clone", deepcopy) toolbox.register("mate", ga_functions.crossover) toolbox.register("sweep_mutation", ga_functions.sweep_mutation) toolbox.register("mutate", ga_functions.mutation) # toolbox.register("select_parents", ) # toolbox.register("select", tools.selTournament, tournsize=4) toolbox.register("select", tools.selRoulette) pop, logbook, best = run_ga(toolbox=toolbox, logbook=logbook, stats=stats, **self.GA_PARAMS) resulted_schedule = ga_functions.build_schedule(best, empty_fixed_schedule_part, 0.0) ga_makespan = Utility.makespan(resulted_schedule) return (ga_makespan, resulted_schedule, logbook)
def generate(wf, rm, estimator, schedule=None, fixed_schedule_part=None, current_time=0.0): sched = schedule if schedule is not None else SimpleRandomizedHeuristic(wf, rm.get_nodes(), estimator).schedule(fixed_schedule_part, current_time) if fixed_schedule_part is not None: un_tasks = unmoveable_tasks(fixed_schedule_part) clean_sched = Schedule({node: [item for item in items if item.job.id not in un_tasks and item.state != ScheduleItem.FAILED] for node, items in sched.mapping.items()}) else: clean_sched = sched mapping, ordering = ord_and_map(clean_sched) ordering_numseq = ordering_to_numseq(ordering) ordering_map = {task_id: val for task_id, val in zip(ordering, ordering_numseq)} ord_p, map_p = OrderingParticle(ordering_map), MappingParticle(mapping) ord_p.velocity = OrderingParticle.Velocity({}) map_p.velocity = MappingParticle.Velocity({}) result = CompoundParticle(map_p, ord_p) if schedule is None and not validate_mapping_with_alive_nodes(result.mapping.entity, rm): raise Exception("found invalid solution in generated array") return result
def __init__(self, workflow, resource_manager, estimator, ranking=None): self.current_schedule = Schedule(dict()) self.workflow = workflow self.resource_manager = resource_manager self.estimator = estimator self.ranking = ranking self.current_time = 0 nodes = self.get_nodes() # print("A: " + str(self.wf_jobs)) #TODO: remove it later # to_print = '' # for job in self.wf_jobs: # to_print = to_print + str(job.id) + " " # print(to_print) pass
def __call__(self, chromo, current_time): (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping, task_to_node) = self._create_helping_structures(chromo) chromo_copy = dict() for (nd_name, items) in chromo.items(): chromo_copy[nd_name] = [] for item in items: chromo_copy[nd_name].append(item) alive_nodes = [node for node in self.nodes if node.state != Node.Down] if len(alive_nodes) == 0: raise Exception("There are not alive nodes") while len(ready_tasks) > 0: for node in alive_nodes: if len(chromo_copy[node.name]) == 0: continue if node.state == Node.Down: continue ## TODO: Urgent! completely rethink this procedure tsk_id = None for i in range(len(chromo_copy[node.name])): if chromo_copy[node.name][i] in ready_tasks: tsk_id = chromo_copy[node.name][i] break if tsk_id is not None: task = self.task_map[tsk_id] #del chromo_copy[node.name][0] chromo_copy[node.name].remove(tsk_id) ready_tasks.remove(tsk_id) time_slots, runtime = self._get_possible_execution_times( schedule_mapping, task_to_node, chrmo_mapping, task, node, current_time) time_slot = next(time_slots) start_time = time_slot[0] end_time = start_time + runtime item = ScheduleItem(task, start_time, end_time) # need to account current time Schedule.insert_item(schedule_mapping, node, item) task_to_node[task.id] = (node, start_time, end_time) finished_tasks.add(task.id) #ready_children = [child for child in task.children if self._is_child_ready(finished_tasks, child)] ready_children = self._get_ready_tasks( task.children, finished_tasks) for child in ready_children: ready_tasks.append(child.id) schedule = Schedule(schedule_mapping) return schedule
def schedule(self, fixed_schedule_part=None, current_time=0.0): estimate = self.estimator.estimate_transfer_time # TODO: make common utility function with ScheduleBuilder def is_last_version_of_task_executing(item): return item.state == ScheduleItem.EXECUTING or item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.UNSTARTED def _get_ready_tasks(children, finished_tasks): def _is_child_ready(child): ids = set([p.id for p in child.parents]) result = False in [id in finished_tasks for id in ids] return not result ready_children = [ child for child in children if _is_child_ready(child) ] return ready_children if fixed_schedule_part is None: schedule_mapping = {node: [] for node in self.nodes} ready_tasks = [ child.id for child in self.workflow.head_task.children ] task_to_node = dict() finished_tasks = set() else: schedule_mapping = { node: [item for item in items] for (node, items) in fixed_schedule_part.mapping.items() } finished_tasks = [ item.job.id for (node, items) in fixed_schedule_part.mapping.items() for item in items if is_last_version_of_task_executing(item) ] finished_tasks = set([self.workflow.head_task.id] + finished_tasks) unfinished = [ task for task in self.workflow.get_all_unique_tasks() if not task.id in finished_tasks ] ready_tasks = [ task.id for task in _get_ready_tasks(unfinished, finished_tasks) ] task_to_node = { item.job.id: (node, item.start_time, item.end_time) for (node, items) in fixed_schedule_part.mapping.items() for item in items if is_last_version_of_task_executing(item) } def is_child_ready(child): ids = set([p.id for p in child.parents]) result = False in [id in finished_tasks for id in ids] return not result def find_slots(node, comm_ready, runtime): node_schedule = schedule_mapping.get(node, list()) free_time = 0 if len( node_schedule) == 0 else node_schedule[-1].end_time ## TODO: refactor it later f_time = max(free_time, comm_ready) f_time = max(f_time, current_time) base_variant = [(f_time, f_time + runtime + 1)] zero_interval = [] if len(node_schedule) == 0 else [ (0, node_schedule[0].start_time) ] middle_intervals = [(node_schedule[i].end_time, node_schedule[i + 1].start_time) for i in range(len(node_schedule) - 1)] intervals = zero_interval + middle_intervals + base_variant #result = [(st, end) for (st, end) in intervals if st >= comm_ready and end - st >= runtime] ## TODO: rethink rounding result = [ (st, end) for (st, end) in intervals if (current_time < st or abs((current_time - st)) < 0.01) and st >= comm_ready and ( runtime < (end - st) or abs((end - st) - runtime) < 0.01) ] return result def comm_ready_func(task, node): ##TODO: remake this stub later. if len(task.parents) == 1 and self.workflow.head_task.id == list( task.parents)[0].id: return 0 return max([ task_to_node[p.id][2] + estimate(node, task_to_node[p.id][0], task, p) for p in task.parents ]) def get_possible_execution_times(task, node): ## pay attention to the last element in the resulted seq ## it represents all available time of node after it completes all its work ## (if such interval can exist) ## time_slots = [(st1, end1),(st2, end2,...,(st_last, st_last + runtime)] runtime = self.estimator.estimate_runtime(task, node) comm_ready = comm_ready_func(task, node) time_slots = find_slots(node, comm_ready, runtime) return time_slots, runtime while len(ready_tasks) > 0: choosed_index = random.randint(0, len(ready_tasks) - 1) task = self.task_map[ready_tasks[choosed_index]] #TODO: make checking for all nodes are dead.(It's a very rare situation so it is not consider for now) alive_nodes = [ node for node in self.nodes if node.state != Node.Down ] choosed_node_index = random.randint(0, len(alive_nodes) - 1) node = alive_nodes[choosed_node_index] time_slots, runtime = get_possible_execution_times(task, node) choosed_time_index = 0 if len(time_slots) == 1 else random.randint( 0, len(time_slots) - 1) time_slot = time_slots[choosed_time_index] start_time = time_slot[0] end_time = start_time + runtime item = ScheduleItem(task, start_time, end_time) ##schedule_mapping[node].append(item) Schedule.insert_item(schedule_mapping, node, item) task_to_node[task.id] = (node, start_time, end_time) ##print('I am here') ready_tasks.remove(task.id) finished_tasks.add(task.id) ready_children = [ child for child in task.children if is_child_ready(child) ] for child in ready_children: ready_tasks.append(child.id) schedule = Schedule(schedule_mapping) return schedule
def __call__(self, chromo, current_time): count_of_tasks = lambda mapping: reduce(operator.add, ( len(tasks) for node, tasks in mapping.items()), 0) alive_nodes = [node for node in self.nodes if node.state != Node.Down] alive_nodes_names = [node.name for node in alive_nodes] for node_name, tasks in chromo.items(): if node_name not in alive_nodes_names and len(tasks) > 0: raise ValueError( "Chromo is invalid. There is a task assigned to a dead node" ) if count_of_tasks(chromo) + len( self.fixed_schedule_part.get_unfailed_tasks_ids()) != len( self.workflow.get_all_unique_tasks()): print("==Chromosome==================================") print(chromo) print("=fixed_schedule_part===================================") print(self.fixed_schedule_part) raise Exception( "The chromosome not a full. Chromo length: {0}, Fixed part length: {1}, workflow size: {2}" .format(count_of_tasks(chromo), len(self.fixed_schedule_part.get_unfailed_tasks_ids()), len(self.workflow.get_all_unique_tasks()))) # TODO: add not to schedule #if count_of_tasks(chromo) + count_of_tasks(self.fixed_schedule_part.mapping) != (schedule_mapping, finished_tasks, ready_tasks, chrmo_mapping, task_to_node) = self._create_helping_structures(chromo) # print("SCHEDULE_MAPPING") # print("AlIVE_NODES", alive_nodes) # pprint(schedule_mapping) #chromo_copy = {nd_name: [item for item in items] for (nd_name, items) in chromo.items()} chromo_copy = deepcopy(chromo) if len(alive_nodes) == 0: raise Exception("There are not alive nodes") #print("Building started...") while len(ready_tasks) > 0: # ## TODO: only for debug. Remove it later. # print("alive nodes: {0}".format(alive_nodes)) # for node_name, tasks in chromo_copy.items(): # print("Node: {0}, tasks count: {1}".format(node_name, len(tasks))) count_before = count_of_tasks(chromo_copy) if len(alive_nodes) == 0: raise ValueError("Count of alive_nodes is zero") for node in alive_nodes: if len(chromo_copy[node.name]) == 0: continue ## TODO: Urgent! completely rethink this procedure tsk_id = None for i in range(len(chromo_copy[node.name])): if chromo_copy[node.name][i] in ready_tasks: tsk_id = chromo_copy[node.name][i] break if tsk_id is not None: task = self.task_map[tsk_id] #del chromo_copy[node.name][0] chromo_copy[node.name].remove(tsk_id) ready_tasks.remove(tsk_id) (start_time, end_time) = place_task_to_schedule( self.workflow, self.estimator, schedule_mapping, task_to_node, chrmo_mapping, task, node, current_time) task_to_node[task.id] = (node, start_time, end_time) finished_tasks.add(task.id) ready_children = self._get_ready_tasks( task.children, finished_tasks) for child in ready_children: ready_tasks.append(child.id) count_after = count_of_tasks(chromo_copy) if count_before == count_after: raise Exception( "Unable to properly process a chromosome." " Perhaps, due to invalid fixed_schedule_part or the chromosome." ) pass schedule = Schedule(schedule_mapping) return schedule