def __init__(self, workflow, resource_manager, estimator, flops=20): self.counter = 0 self.workflow = workflow self.flops = flops ##interface Estimator self.estimator = estimator self.resource_manager = resource_manager nodes = list(HeftHelper.to_nodes(resource_manager.get_resources())) ranking = HeftHelper.build_ranking_func(nodes, lambda job, agent: estimator.estimate_runtime(job, agent), lambda ni, nj, A, B: estimator.estimate_transfer_time(A, B, ni, nj)) sorted_tasks = ranking(self.workflow) self.nodes = nodes self.sorted_tasks = sorted_tasks self.workflow_size = len(sorted_tasks) self.task_map = {task.id: task for task in sorted_tasks} self.node_map = {node.name: node for node in nodes} self.initializing_alg = SimpleRandomizedHeuristic(self.workflow, self.nodes, self.estimator) self.initial_chromosome = None##GAFunctions.schedule_to_chromosome(initial_schedule) pass
def make_ranking(self, wf, nodes): ##resources = self.resource_manager.get_resources() ##print("common nodes count:" + str(len(toNodes(resources)))) ##nodes = HeftHelper.to_nodes(resources) ranking_func = HeftHelper.build_ranking_func(nodes, self.compcost, self.commcost) wf_jobs = ranking_func(wf) return wf_jobs
def run(self, current_cleaned_schedule): ## current_cleaned_schedule - this schedule contains only ## finished and executed tasks, all unfinished and failed have been removed already ## current_cleaned_schedule also have down nodes and new added ## ALGORITHM DOESN'T CHECK ADDING OF NEW NODES BY ITSELF ## nodes contain only available now ## 1. get all unscheduled tasks ## 2. sort them by rank ## 3. map on the existed nodes according to current_cleaned_schedule nodes = self.get_nodes() for_planning = HeftHelper.get_tasks_for_planning( self.workflow, current_cleaned_schedule) ## TODO: check if it sorted properly for_planning = set([task.id for task in for_planning]) sorted_tasks = [ task for task in self.wf_jobs if task.id in for_planning ] # print("P: " + str(sorted_tasks)) new_sched = self.mapping([(self.workflow, sorted_tasks)], current_cleaned_schedule.mapping, nodes, self.commcost, self.compcost) return new_sched
def schedule(self): """ create inter-priority """ def byPriority(wf): return 0 if wf.priority is None else wf.priority ##simple inter priority sorting sorted_wfs = sorted(self.workflows, key=byPriority) wf_jobs = {wf: [] for wf in sorted_wfs} resources = self.resource_manager.get_resources() ##print("common nodes count:" + str(len(toNodes(resources)))) nodes = HeftHelper.to_nodes(resources) wf_jobs = {wf: self.make_ranking(wf, nodes) for wf in sorted_wfs} ##new_schedule = self.get_unchanged_schedule(self.old_schedule, time) new_schedule = Schedule({node: [] for node in nodes}) new_plan = new_schedule.mapping for (wf, jobs) in wf_jobs.items(): new_schedule = self.mapping([(wf, jobs)], new_plan, nodes, self.commcost, self.compcost) new_plan = new_schedule.mapping return new_schedule
def __init__(self, workflow, resource_manager, estimator): self.counter = 0 self.workflow = workflow ##interface Estimator self.estimator = estimator self.resource_manager = resource_manager nodes = resource_manager.get_nodes( ) #list(HeftHelper.to_nodes(resource_manager.get_resources())) ranking = HeftHelper.build_ranking_func( nodes, lambda job, agent: estimator.estimate_runtime(job, agent), lambda ni, nj, A, B: estimator.estimate_transfer_time( A, B, ni, nj)) sorted_tasks = ranking(self.workflow) self.nodes = nodes self.sorted_tasks = sorted_tasks self.workflow_size = len(sorted_tasks) self.task_map = {task.id: task for task in sorted_tasks} self.node_map = {node.name: node for node in nodes} self.initializing_alg = SimpleRandomizedHeuristic( self.workflow, self.nodes, self.estimator) self.initial_chromosome = None ##GAFunctions.schedule_to_chromosome(initial_schedule) pass
def default_fixed_schedule_part(resource_manager): fix_schedule_part = Schedule({ node: [] for node in HeftHelper.to_nodes( resource_manager.get_resources()) }) return fix_schedule_part
def get_infrastructure(bundle, reliability, with_ga_initial, nodes_conf=None): if nodes_conf is not None: resources = ResourceGenerator.r(nodes_conf) else: resources = bundle.dedicated_resources nodes = HeftHelper.to_nodes(resources) realibility_map = {node.name: reliability for node in nodes} initial_schedule = None if with_ga_initial is True: initial_schedule = bundle.ga_schedule #initial_ga_makespan = Utility.get_the_last_time(initial_schedule ) #print("Initial GA makespan: " + str(initial_ga_makespan)) ## TODO: end ##====================== ## create heft_executor ##====================== estimator = ExperimentEstimator(bundle.transfer_mx, bundle.ideal_flops, realibility_map) resource_manager = ExperimentResourceManager(resources) return (estimator, resource_manager, initial_schedule)
def test_fixed_ordering(self): _wf = wf("Montage_25") rm = ExperimentResourceManager(rg.r([10, 15, 25, 30])) estimator = SimpleTimeCostEstimator(comp_time_cost=0, transf_time_cost=0, transferMx=None, ideal_flops=20, transfer_time=100) sorted_tasks = HeftHelper.heft_rank(_wf, rm, estimator) heft_schedule = run_heft(_wf, rm, estimator) heft_mapping = schedule_to_position(heft_schedule) heft_gen = lambda: heft_mapping if random.random( ) > 0.95 else generate(_wf, rm, estimator) toolbox = Toolbox() # toolbox.register("generate", generate, _wf, rm, estimator) toolbox.register("generate", heft_gen) toolbox.register("fitness", fitness, _wf, rm, estimator, sorted_tasks) toolbox.register("force_vector_matrix", force_vector_matrix, rm) toolbox.register("velocity_and_position", velocity_and_position, _wf, rm, estimator) toolbox.register("G", G) toolbox.register("kbest", Kbest) statistics = Statistics() statistics.register( "min", lambda pop: numpy.min([p.fitness.mofit for p in pop])) statistics.register( "avr", lambda pop: numpy.average([p.fitness.mofit for p in pop])) statistics.register( "max", lambda pop: numpy.max([p.fitness.mofit for p in pop])) statistics.register( "std", lambda pop: numpy.std([p.fitness.mofit for p in pop])) logbook = Logbook() logbook.header = ("gen", "G", "kbest", "min", "avr", "max", "std") pop_size = 100 iter_number = 100 kbest = pop_size ginit = 5 final_pop = run_gsa(toolbox, statistics, logbook, pop_size, iter_number, kbest, ginit) best = min(final_pop, key=lambda x: toolbox.fitness(x).mofit) solution = { MAPPING_SPECIE: list(zip(sorted_tasks, best)), ORDERING_SPECIE: sorted_tasks } schedule = build_schedule(_wf, estimator, rm, solution) Utility.validate_static_schedule(_wf, schedule) makespan = Utility.makespan(schedule) print("Final makespan: {0}".format(makespan)) pass
def ordering_default_initialize(ctx, size): env = ctx['env'] sorted_tasks = HeftHelper.heft_rank(env.wf, env.rm, env.estimator) assert _check_precedence(env.wf, sorted_tasks), "Check precedence failed" result = [ListBasedIndividual(ordering_default_mutate(ctx, deepcopy(sorted_tasks))) for i in range(size)] return result
def init(self): if self.initial_schedule is None: self.current_schedule = Schedule({node:[] for node in self.heft_planner.get_nodes()}) self.current_schedule = self.heft_planner.run(self.current_schedule) else: id_to_task = {tsk.id: tsk for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow)} mapping = {node: [ScheduleItem(id_to_task[item.job.id], item.start_time, item.end_time) for item in items] for (node, items) in self.initial_schedule.mapping.items()} self.current_schedule = Schedule(mapping) self._post_new_events()
def checkDown(self, node_name, is_down): nodes = HeftHelper.to_nodes(self.public_resources) for nd in nodes: if nd.name == node_name: if is_down: nd.state = Node.Down else: nd.state = Node.Unknown pass
def mapping(self, sorted_jobs, existing_plan, nodes, commcost, compcost): """def allocate(job, orders, jobson, prec, compcost, commcost):""" """ Allocate job to the machine with earliest finish time Operates in place """ ## TODO: add finished tasks jobson = dict() for (node, items) in existing_plan.items(): for item in items: if item.state == ScheduleItem.FINISHED or item.state == ScheduleItem.EXECUTING: jobson[item.job] = node new_plan = existing_plan def ft(machine): #cost = st(machine) runtime = compcost(task, machine) cost = st(machine, runtime) + runtime ##print("machine: %s job:%s cost: %s" % (machine.name, task.id, cost)) ##print("machine: " + str(machine.name) + " cost: " + str(cost)) return cost for wf, tasks in sorted_jobs: ##wf_dag = self.convert_to_parent_children_map(wf) wf_dag = HeftHelper.convert_to_parent_children_map(wf) prec = reverse_dict(wf_dag) for task in tasks: st = partial(self.start_time, wf, task, new_plan, jobson, prec, commcost) # ress = [(key, ft(key)) for key in new_plan.keys()] # agent_pair = min(ress, key=lambda x: x[1][0]) # agent = agent_pair[0] # start = agent_pair[1][0] # end = agent_pair[1][1] agent = min(new_plan.keys(), key=ft) runtime = compcost(task, agent) start = st(agent, runtime) end = ft(agent) # new_plan[agent].append(ScheduleItem(task, start, end)) Schedule.insert_item(new_plan, agent, ScheduleItem(task, start, end)) jobson[task] = agent new_sched = Schedule(new_plan) return new_sched
def ordering_default_initialize(ctx, size): env = ctx['env'] sorted_tasks = HeftHelper.heft_rank(env.wf, env.rm, env.estimator) assert check_precedence(env.wf, sorted_tasks), "Check precedence failed" result = [ ListBasedIndividual( ordering_default_mutate(ctx, deepcopy(sorted_tasks))) for i in range(size) ] return result
def _run_heft(): dynamic_planner = DynamicHeft(wf, resource_manager, estimator) nodes = HeftHelper.to_nodes(resource_manager.resources) current_cleaned_schedule = Schedule({node: [] for node in nodes}) schedule_dynamic_heft = dynamic_planner.run(current_cleaned_schedule) self._validate(wf, estimator, schedule_dynamic_heft) if is_visualized: viz.visualize_task_node_mapping(wf, schedule_dynamic_heft) # Utility.create_jedule_visualization(schedule_dynamic_heft, wf_name+'_heft') pass return schedule_dynamic_heft
def __init__(self, workflow, nodes, estimator): self.workflow = workflow self.nodes = nodes self.workflow_size = workflow.get_task_count() self.estimator = estimator self.task_map = {task.id: task for task in HeftHelper.get_all_tasks(self.workflow)} self.node_map = {node.name: node for node in nodes} self.initial_chromosome = None pass
def __init__(self, workflow, nodes, estimator): self.workflow = workflow self.nodes = nodes self.workflow_size = workflow.get_task_count() self.estimator = estimator self.task_map = { task.id: task for task in HeftHelper.get_all_tasks(self.workflow) } self.node_map = {node.name: node for node in nodes} self.initial_chromosome = None pass
def _run_heft(): dynamic_planner = DynamicHeft(wf, resource_manager, estimator) nodes = HeftHelper.to_nodes(resource_manager.resources) current_cleaned_schedule = Schedule({node: [] for node in nodes}) schedule_dynamic_heft = dynamic_planner.run( current_cleaned_schedule) self._validate(wf, estimator, schedule_dynamic_heft) if is_visualized: viz.visualize_task_node_mapping(wf, schedule_dynamic_heft) # Utility.create_jedule_visualization(schedule_dynamic_heft, wf_name+'_heft') pass return schedule_dynamic_heft
def test_fixed_ordering(self): _wf = wf("Montage_25") rm = ExperimentResourceManager(rg.r([10, 15, 25, 30])) estimator = SimpleTimeCostEstimator(comp_time_cost=0, transf_time_cost=0, transferMx=None, ideal_flops=20, transfer_time=100) sorted_tasks = HeftHelper.heft_rank(_wf, rm, estimator) heft_schedule = run_heft(_wf, rm, estimator) heft_mapping = schedule_to_position(heft_schedule) heft_gen = lambda: heft_mapping if random.random() > 0.95 else generate(_wf, rm, estimator) toolbox = Toolbox() # toolbox.register("generate", generate, _wf, rm, estimator) toolbox.register("generate", heft_gen) toolbox.register("fitness", fitness, _wf, rm, estimator, sorted_tasks) toolbox.register("force_vector_matrix", force_vector_matrix, rm) toolbox.register("velocity_and_position", velocity_and_position, _wf, rm, estimator) toolbox.register("G", G) toolbox.register("kbest", Kbest) statistics = Statistics() statistics.register("min", lambda pop: numpy.min([p.fitness.mofit for p in pop])) statistics.register("avr", lambda pop: numpy.average([p.fitness.mofit for p in pop])) statistics.register("max", lambda pop: numpy.max([p.fitness.mofit for p in pop])) statistics.register("std", lambda pop: numpy.std([p.fitness.mofit for p in pop])) logbook = Logbook() logbook.header = ("gen", "G", "kbest", "min", "avr", "max", "std") pop_size = 100 iter_number = 100 kbest = pop_size ginit = 5 final_pop = run_gsa(toolbox, statistics, logbook, pop_size, iter_number, kbest, ginit) best = min(final_pop, key=lambda x: toolbox.fitness(x).mofit) solution = {MAPPING_SPECIE: list(zip(sorted_tasks, best)), ORDERING_SPECIE: sorted_tasks} schedule = build_schedule(_wf, estimator, rm, solution) Utility.validate_static_schedule(_wf, schedule) makespan = Utility.makespan(schedule) print("Final makespan: {0}".format(makespan)) pass
def init(self): if self.initial_schedule is None: self.current_schedule = Schedule( {node: [] for node in self.heft_planner.get_nodes()}) self.current_schedule = self.heft_planner.run( self.current_schedule) else: id_to_task = { tsk.id: tsk for tsk in HeftHelper.get_all_tasks(self.heft_planner.workflow) } mapping = { node: [ ScheduleItem(id_to_task[item.job.id], item.start_time, item.end_time) for item in items ] for (node, items) in self.initial_schedule.mapping.items() } self.current_schedule = Schedule(mapping) self._post_new_events()
def __init__(self, workflow, resource_manager, estimator, task_map, node_map, # fixed part of schedule. It need to be accounted when new schedule is built, but it's not possible to cahnge something inside it fixed_schedule_part): self.workflow = workflow self.nodes = HeftHelper.to_nodes(resource_manager.get_resources()) self.estimator = estimator ##TODO: Build it self.task_map = task_map ##TODO: Build it self.node_map = node_map self.fixed_schedule_part = fixed_schedule_part # construct initial mapping # eliminate all already scheduled tasks pass
def __init__( self, workflow, resource_manager, estimator, task_map, node_map, # fixed part of schedule. It need to be accounted when new schedule is built, but it's not possible to cahnge something inside it fixed_schedule_part): self.workflow = workflow self.nodes = HeftHelper.to_nodes(resource_manager.get_resources()) self.estimator = estimator ##TODO: Build it self.task_map = task_map ##TODO: Build it self.node_map = node_map self.fixed_schedule_part = fixed_schedule_part # construct initial mapping # eliminate all already scheduled tasks pass
def run(self, current_cleaned_schedule): ## current_cleaned_schedule - this schedule contains only ## finished and executed tasks, all unfinished and failed have been removed already ## current_cleaned_schedule also have down nodes and new added ## ALGORITHM DOESN'T CHECK ADDING OF NEW NODES BY ITSELF ## nodes contain only available now ## 1. get all unscheduled tasks ## 2. sort them by rank ## 3. map on the existed nodes according to current_cleaned_schedule nodes = self.get_nodes() for_planning = HeftHelper.get_tasks_for_planning(self.workflow, current_cleaned_schedule) ## TODO: check if it sorted properly for_planning = set([task.id for task in for_planning]) sorted_tasks = [task for task in self.wf_jobs if task.id in for_planning] # print("P: " + str(sorted_tasks)) new_sched = self.mapping([(self.workflow, sorted_tasks)], current_cleaned_schedule.mapping, nodes, self.commcost, self.compcost) return new_sched
def get_by_softreq(self, soft_reqs): nodes = HeftHelper.to_nodes(self.public_resources) def check_reqs(node): return (soft_reqs in node.soft) or (SoftItem.ANY_SOFT in node.soft) gotcha = [node for node in nodes if node.state != Node.Down and check_reqs(node)] return gotcha
def generate_public_resources(self): ## TODO: remake it later #(public_resources, generate_reliability, generate_probability_law_for_(task,node)_pair) = generate public_resource resCount = 3 resources = list() for i in range(0, resCount): res = Resource("public_res_" + str(i)) resources.append(res) nodeCount = None if i == 0: nodeCount = 15 elif i == 1: nodeCount = 12 elif i == 2: nodeCount = 9 for j in range(0, nodeCount): node = Node(res.name + "_node_" + str(j), res, [SoftItem.ANY_SOFT]) # if j == 0: # node.flops = 10 + 5 # if j == 1: # node.flops = 15 + 10#10*3 # if j == 2: # node.flops = 25 + 10#25*3 # if j == 3: # node.flops = 25 + 10#25*3 # if j == 4: # node.flops = 30 + 10#30*3 # if j == 5: # node.flops = 10 + 5 # if j == 6: # node.flops = 15 + 10#10*3 # if j == 7: # node.flops = 25 + 10#25*3 # if j == 8: # node.flops = 25 + 10#25*3 # if j == 9: # node.flops = 30 + 10#30*3 # if j == 10: # node.flops = 10 + 5 # if j == 11: # node.flops = 15 + 10#10*3 # if j == 12: # node.flops = 25 + 10#25*3 # if j == 13: # node.flops = 25 + 10#25*3 # if j == 14: # node.flops = 30 + 10#30*3 if j == 0: node.flops = 10 if j == 1: node.flops = 15#10*3 if j == 2: node.flops = 25#25*3 if j == 3: node.flops = 25#25*3 if j == 4: node.flops = 30#30*3 if j == 5: node.flops = 10 if j == 6: node.flops = 15#10*3 if j == 7: node.flops = 25#25*3 if j == 8: node.flops = 25#25*3 if j == 9: node.flops = 30#30*3 if j == 10: node.flops = 10 if j == 11: node.flops = 15#10*3 if j == 12: node.flops = 25#25*3 if j == 13: node.flops = 25#25*3 if j == 14: node.flops = 30#30*3 res.nodes.add(node) nodes = HeftHelper.to_nodes(resources) reliability_map = {node.name: 0.9 for node in nodes} def probability_estimator(dt, comp_estimation, transfer_estimation): M = comp_estimation + transfer_estimation sigma = 0.1 * M result = 0.5 *(1 + math.erf((dt - M)/sigma)) return result return (resources, reliability_map, probability_estimator)
from heft.algs.pso.gapso import run_gapso from heft.algs.pso.sdpso import run_pso, update, schedule_to_position, construct_solution, MappingParticle, \ Velocity, Position from heft.core.CommonComponents.ExperimentalManagers import ExperimentResourceManager from heft.core.environment.Utility import Utility, wf from heft.algs.common.mapordschedule import build_schedule, MAPPING_SPECIE, ORDERING_SPECIE, ordering_from_schedule, \ mapping_from_schedule from heft.experiments.cga.mobjective.utility import SimpleTimeCostEstimator from heft.core.environment.ResourceGenerator import ResourceGenerator as rg from heft.algs.common.mapordschedule import fitness as basefitness _wf = wf("Montage_50") rm = ExperimentResourceManager(rg.r([10, 15, 25, 30])) estimator = SimpleTimeCostEstimator(comp_time_cost=0, transf_time_cost=0, transferMx=None, ideal_flops=20, transfer_time=100) sorted_tasks = HeftHelper.heft_rank(_wf, rm, estimator) heft_schedule = run_heft(_wf, rm, estimator) print(Utility.makespan(heft_schedule)) stats = tools.Statistics(lambda ind: ind.fitness.values[0]) stats.register("avg", numpy.mean) stats.register("std", numpy.std) stats.register("min", numpy.min) stats.register("max", numpy.max) logbook = tools.Logbook()
def isCloudNode(self, node): result = node.name in [nd.name for nd in HeftHelper.to_nodes(self.public_resources)] return result
from heft.core.CommonComponents.ExperimentalManagers import ExperimentResourceManager from heft.core.environment.Utility import Utility, wf from heft.algs.common.mapordschedule import build_schedule, MAPPING_SPECIE, ORDERING_SPECIE from heft.experiments.aggregate_utilities import interval_statistics, interval_stat_string from heft.experiments.cga.mobjective.utility import SimpleTimeCostEstimator from heft.core.environment.ResourceGenerator import ResourceGenerator as rg from heft.experiments.cga.utilities.common import repeat _wf = wf("Montage_75") rm = ExperimentResourceManager(rg.r([10, 15, 25, 30])) estimator = SimpleTimeCostEstimator(comp_time_cost=0, transf_time_cost=0, transferMx=None, ideal_flops=20, transfer_time=100) sorted_tasks = HeftHelper.heft_rank(_wf, rm, estimator) heft_schedule = run_heft(_wf, rm, estimator) heft_mapping = schedule_to_position(heft_schedule) heft_mapping.velocity = MappingParticle.Velocity({}) heft_gen = lambda n: [ deepcopy(heft_mapping) if random.random() > 1.0 else generate(_wf, rm, estimator, 1)[0] for _ in range(n) ] W, C1, C2 = 0.1, 0.6, 0.2 GEN, N = 300, 50
def default_fixed_schedule_part(resource_manager): fix_schedule_part = Schedule({node: [] for node in HeftHelper.to_nodes(resource_manager.get_resources())}) return fix_schedule_part
def generate_public_resources(self): ## TODO: remake it later #(public_resources, generate_reliability, generate_probability_law_for_(task,node)_pair) = generate public_resource resCount = 3 resources = list() for i in range(0, resCount): res = Resource("public_res_" + str(i)) resources.append(res) nodeCount = None if i == 0: nodeCount = 15 elif i == 1: nodeCount = 12 elif i == 2: nodeCount = 9 for j in range(0, nodeCount): node = Node(res.name + "_node_" + str(j), res, [SoftItem.ANY_SOFT]) # if j == 0: # node.flops = 10 + 5 # if j == 1: # node.flops = 15 + 10#10*3 # if j == 2: # node.flops = 25 + 10#25*3 # if j == 3: # node.flops = 25 + 10#25*3 # if j == 4: # node.flops = 30 + 10#30*3 # if j == 5: # node.flops = 10 + 5 # if j == 6: # node.flops = 15 + 10#10*3 # if j == 7: # node.flops = 25 + 10#25*3 # if j == 8: # node.flops = 25 + 10#25*3 # if j == 9: # node.flops = 30 + 10#30*3 # if j == 10: # node.flops = 10 + 5 # if j == 11: # node.flops = 15 + 10#10*3 # if j == 12: # node.flops = 25 + 10#25*3 # if j == 13: # node.flops = 25 + 10#25*3 # if j == 14: # node.flops = 30 + 10#30*3 if j == 0: node.flops = 10 if j == 1: node.flops = 15 #10*3 if j == 2: node.flops = 25 #25*3 if j == 3: node.flops = 25 #25*3 if j == 4: node.flops = 30 #30*3 if j == 5: node.flops = 10 if j == 6: node.flops = 15 #10*3 if j == 7: node.flops = 25 #25*3 if j == 8: node.flops = 25 #25*3 if j == 9: node.flops = 30 #30*3 if j == 10: node.flops = 10 if j == 11: node.flops = 15 #10*3 if j == 12: node.flops = 25 #25*3 if j == 13: node.flops = 25 #25*3 if j == 14: node.flops = 30 #30*3 res.nodes.add(node) nodes = HeftHelper.to_nodes(resources) reliability_map = {node.name: 0.9 for node in nodes} def probability_estimator(dt, comp_estimation, transfer_estimation): M = comp_estimation + transfer_estimation sigma = 0.1 * M result = 0.5 * (1 + math.erf((dt - M) / sigma)) return result return (resources, reliability_map, probability_estimator)
def get_nodes(self): resources = self.resource_manager.get_resources() nodes = HeftHelper.to_nodes(resources) return nodes