def median_progress_rate_speedup(self, prefix): """ Returns how fast the job would have run if all tasks had the median progress rate. """ total_median_progress_rate_runtime = 0 runtimes_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): median_rate_runtimes = stage.task_runtimes_with_median_progress_rate() if id in self.stages_to_combine: runtimes_for_combined_stages.extend(median_rate_runtimes) else: no_stragglers_runtime, start_finish_times = simulate.simulate( median_rate_runtimes, concurrency.get_max_concurrency(stage.tasks)) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) print "No stragglers runtime: ", no_stragglers_runtime print "MAx concurrency: ", concurrency.get_max_concurrency(stage.tasks) if len(runtimes_for_combined_stages) > 0: no_stragglers_runtime, start_finish_times = simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency) start_finish_times_adjusted = [ (start + total_median_progress_rate_runtime, finish + total_median_progress_rate_runtime) \ for start, finish in start_finish_times] total_median_progress_rate_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) self.write_simulated_waterfall(all_start_finish_times, "%s_sim_median_progress_rate" % prefix) return total_median_progress_rate_runtime * 1.0 / self.get_simulated_runtime()
def get_simulated_runtime(self, waterfall_prefix=""): """ Returns the simulated runtime for the job. This should be approximately the same as the original runtime of the job, except that it doesn't include scheduler delay. If a non-empty waterfall_prefix is passed in, makes a waterfall plot based on the simulated runtimes. """ total_runtime = 0 tasks_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): if id in self.stages_to_combine: tasks_for_combined_stages.extend(stage.tasks) else: tasks = sorted(stage.tasks, key = lambda task: task.start_time) simulated_runtime, start_finish_times = simulate.simulate( [t.runtime() for t in tasks], concurrency.get_max_concurrency(tasks)) start_finish_times_adjusted = [ (start + total_runtime, finish + total_runtime) for start, finish in start_finish_times] all_start_finish_times.append(start_finish_times_adjusted) total_runtime += simulated_runtime if len(tasks_for_combined_stages) > 0: tasks = sorted(tasks_for_combined_stages, key = lambda task: task.start_time) simulated_runtime, start_finish_times = simulate.simulate( [task.runtime() for task in tasks], self.combined_stages_concurrency) start_finish_times_adjusted = [ (start - simulated_runtime, finish - simulated_runtime) for start, finish in start_finish_times] all_start_finish_times.append(start_finish_times_adjusted) total_runtime += simulated_runtime if waterfall_prefix: self.write_simulated_waterfall(all_start_finish_times, "%s_simulated" % waterfall_prefix) return total_runtime
def get_simulated_runtime(self, waterfall_prefix=""): """ Returns the simulated runtime for the job. This should be approximately the same as the original runtime of the job, except that it doesn't include scheduler delay. If a non-empty waterfall_prefix is passed in, makes a waterfall plot based on the simulated runtimes. """ total_runtime = 0 tasks_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): if id in self.stages_to_combine: tasks_for_combined_stages.extend(stage.tasks) else: tasks = sorted(stage.tasks, key = lambda task: task.start_time) simulated_runtime, start_finish_times = simulate.simulate( [t.runtime() for t in tasks], concurrency.get_max_concurrency(tasks)) start_finish_times_adjusted = [ (start + total_runtime, finish + total_runtime) for start, finish in start_finish_times] all_start_finish_times.append(start_finish_times_adjusted) total_runtime += simulated_runtime if len(tasks_for_combined_stages) > 0: tasks = sorted(tasks_for_combined_stages, key = lambda task: task.start_time) simulated_runtime, start_finish_times = simulate.simulate( [task.runtime() for task in tasks], self.combined_stages_concurrency) start_finish_times_adjusted = [ (start - simulated_runtime, finish - simulated_runtime) for start, finish in start_finish_times] all_start_finish_times.append(start_finish_times_adjusted) total_runtime += simulated_runtime if waterfall_prefix: self.write_simulated_waterfall(all_start_finish_times, "%s_simulated" % waterfall_prefix) return total_runtime
def __str__(self): max_task_runtime = max([t.runtime() for t in self.tasks]) if self.tasks[0].has_fetch: input_method = "shuffle" else: input_method = self.tasks[0].input_read_method return (( "%s tasks (avg runtime: %s, max runtime: %s) Start: %s, runtime: %s, " "Max concurrency: %s, " "Input MB: %s (from %s), Output MB: %s, Straggers: %s, Progress rate straggers: %s, " "Progress rate stragglers explained by scheduler delay (%s), HDFS read (%s), " "HDFS and read (%s), GC (%s), Network (%s), JIT (%s), output rate stragglers: %s" ) % ( len(self.tasks), self.average_task_runtime(), max_task_runtime, self.start_time, self.finish_time() - self.start_time, concurrency.get_max_concurrency(self.tasks), self.input_mb(), input_method, self.output_mb(), self.traditional_stragglers(), self.progress_rate_stragglers()[0], self.scheduler_delay_stragglers()[0], self.hdfs_read_stragglers()[0], self.hdfs_read_and_scheduler_delay_stragglers()[0], self.gc_stragglers()[0], # Do not compute the JIT stragglers here! Screws up the calculation. self.network_stragglers()[0], -1, self.output_progress_rate_stragglers()[0]))
def replace_stragglers_with_median_speedup(self, threshold_fn): """ Returns how much faster the job would have run if there were no stragglers. For each stage, passes the list of task runtimes into threshold_fn, which should return a threshold runtime. Then, replaces all task runtimes greater than the given threshold with the median runtime. For example, to replace the tasks with the longest 5% of runtimes with the median: self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95) """ self.print_heading("Computing speedup from replacing straggler tasks with median") total_no_stragglers_runtime = 0 start_and_runtimes_for_combined_stages = [] original_start_and_runtimes_for_combined_stages = [] num_stragglers_combined_stages = 0 for id, stage in self.stages.iteritems(): runtimes = [task.runtime() for task in stage.tasks] median_runtime = numpy.percentile(runtimes, 50) threshold_runtime = threshold_fn(runtimes) no_straggler_start_and_runtimes = [] num_stragglers = 0 sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime()) for task in sorted_stage_tasks: if task.runtime() >= threshold_runtime: assert(median_runtime <= task.runtime()) no_straggler_start_and_runtimes.append((task.start_time, median_runtime)) num_stragglers += 1 else: no_straggler_start_and_runtimes.append((task.start_time, task.runtime())) if id in self.stages_to_combine: start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes) original_start_and_runtimes_for_combined_stages.extend( [(t.start_time, t.runtime()) for t in stage.tasks]) num_stragglers_combined_stages += num_stragglers else: max_concurrency = concurrency.get_max_concurrency(stage.tasks) no_stragglers_runtime = simulate.simulate( [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0] total_no_stragglers_runtime += no_stragglers_runtime original_runtime = simulate.simulate( [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0] print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" % (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime, num_stragglers)) if len(start_and_runtimes_for_combined_stages) > 0: original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages]) original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages]) start_and_runtimes_for_combined_stages.sort() runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages] new_runtime = simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency)[0] original_runtime = simulate.simulate( [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)], self.combined_stages_concurrency)[0] print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" % (original_finish_time - original_start_time, original_runtime, new_runtime, num_stragglers_combined_stages)) total_no_stragglers_runtime += new_runtime return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
def replace_stragglers_with_median_speedup(self, threshold_fn): """ Returns how much faster the job would have run if there were no stragglers. For each stage, passes the list of task runtimes into threshold_fn, which should return a threshold runtime. Then, replaces all task runtimes greater than the given threshold with the median runtime. For example, to replace the tasks with the longest 5% of runtimes with the median: self.replace_stragglers_with_median_speedup(lambda runtimes: numpy.percentile(runtimes, 95) """ self.print_heading("Computing speedup from replacing straggler tasks with median") total_no_stragglers_runtime = 0 start_and_runtimes_for_combined_stages = [] original_start_and_runtimes_for_combined_stages = [] num_stragglers_combined_stages = 0 for id, stage in self.stages.iteritems(): runtimes = [task.runtime() for task in stage.tasks] median_runtime = numpy.percentile(runtimes, 50) threshold_runtime = threshold_fn(runtimes) no_straggler_start_and_runtimes = [] num_stragglers = 0 sorted_stage_tasks = sorted(stage.tasks, key = lambda t: t.runtime()) for task in sorted_stage_tasks: if task.runtime() >= threshold_runtime: assert(median_runtime <= task.runtime()) no_straggler_start_and_runtimes.append((task.start_time, median_runtime)) num_stragglers += 1 else: no_straggler_start_and_runtimes.append((task.start_time, task.runtime())) if id in self.stages_to_combine: start_and_runtimes_for_combined_stages.extend(no_straggler_start_and_runtimes) original_start_and_runtimes_for_combined_stages.extend( [(t.start_time, t.runtime()) for t in stage.tasks]) num_stragglers_combined_stages += num_stragglers else: max_concurrency = concurrency.get_max_concurrency(stage.tasks) no_stragglers_runtime = simulate.simulate( [x[1] for x in no_straggler_start_and_runtimes], max_concurrency)[0] total_no_stragglers_runtime += no_stragglers_runtime original_runtime = simulate.simulate( [task.runtime() for task in sorted_stage_tasks], max_concurrency)[0] print ("%s: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" % (id, stage.finish_time() - stage.start_time, original_runtime, no_stragglers_runtime, num_stragglers)) if len(start_and_runtimes_for_combined_stages) > 0: original_start_time = min([x[0] for x in start_and_runtimes_for_combined_stages]) original_finish_time = max([x[0] + x[1] for x in start_and_runtimes_for_combined_stages]) start_and_runtimes_for_combined_stages.sort() runtimes_for_combined_stages = [x[1] for x in start_and_runtimes_for_combined_stages] new_runtime = simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency)[0] original_runtime = simulate.simulate( [x[1] for x in sorted(original_start_and_runtimes_for_combined_stages)], self.combined_stages_concurrency)[0] print ("Combined: Original: %s, Orig (sim): %s, no stragg: %s (%s stragglers)" % (original_finish_time - original_start_time, original_runtime, new_runtime, num_stragglers_combined_stages)) total_no_stragglers_runtime += new_runtime return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
def add_tasks_to_totals(unsorted_tasks): # Sort the tasks by the start time, not the finish time -- otherwise the longest tasks # end up getting run last, which can artificially inflate job completion time. tasks = sorted(unsorted_tasks, key = lambda task: task.start_time) max_concurrency = concurrency.get_max_concurrency(tasks) # Get the runtime for the stage task_runtimes = [compute_base_runtime(task) for task in tasks] base_runtime = simulate.simulate(task_runtimes, max_concurrency)[0] total_time[0] += base_runtime faster_runtimes = [compute_faster_runtime(task) for task in tasks] faster_runtime = simulate.simulate(faster_runtimes, max_concurrency)[0] total_faster_time[0] += faster_runtime print "Base: %s, faster: %s" % (base_runtime, faster_runtime)
def add_tasks_to_totals(unsorted_tasks): # Sort the tasks by the start time, not the finish time -- otherwise the longest tasks # end up getting run last, which can artificially inflate job completion time. tasks = sorted(unsorted_tasks, key = lambda task: task.start_time) max_concurrency = concurrency.get_max_concurrency(tasks) # Get the runtime for the stage task_runtimes = [compute_base_runtime(task) for task in tasks] base_runtime = simulate.simulate(task_runtimes, max_concurrency)[0] total_time[0] += base_runtime faster_runtimes = [compute_faster_runtime(task) for task in tasks] faster_runtime = simulate.simulate(faster_runtimes, max_concurrency)[0] total_faster_time[0] += faster_runtime print "Base: %s, faster: %s" % (base_runtime, faster_runtime)
def initialize_job(self, time, initialTime): """ Should be called after adding all events to the job. """ # Drop empty stages. self.submittingTime = time stages_to_drop = [] for id, s in self.stages.iteritems(): if len(s.tasks) == 0: stages_to_drop.append(id) for id in stages_to_drop: print "Dropping stage %s" % id del self.stages[id] # Compute the amount of overlapped time between stages # (there should just be two stages, at the beginning, that overlap and run concurrently). # This computation assumes that not more than two stages overlap. #print " ", ["%s: %s tasks" % (id, len(s.tasks)) for id, s in self.stages.iteritems()] start_and_finish_times = [(id, s.start_time, s.conservative_finish_time()) for id, s in self.stages.iteritems()] start_and_finish_times.sort(key = lambda x: x[1]) self.overlap = 0 old_end = 0 previous_id = "" self.stages_to_combine = set() ts = 0.0 for id, start, finish in start_and_finish_times: print "id, submission, start, finish, runTime, duration" print id, self.submittingTime-initialTime, start-initialTime, finish-initialTime, finish-start, finish-self.submittingTime return finish-self.submittingTime if start < old_end: self.overlap += old_end - start print " Overlap:", self.overlap, "between ", id, "and", previous_id self.stages_to_combine.add(id) self.stages_to_combine.add(previous_id) old_end = max(old_end, finish) if finish > old_end: old_end = finish previous_id = id # print "Stages to combine: ", self.stages_to_combine self.combined_stages_concurrency = -1 if len(self.stages_to_combine) > 0: tasks_for_combined_stages = [] for stage_id in self.stages_to_combine: tasks_for_combined_stages.extend(self.stages[stage_id].tasks) self.combined_stages_concurrency = concurrency.get_max_concurrency(tasks_for_combined_stages)
def initialize_job(self): """ Should be called after adding all events to the job. """ # Drop empty stages. stages_to_drop = [] for id, s in self.stages.iteritems(): if len(s.tasks) == 0: stages_to_drop.append(id) for id in stages_to_drop: print "Dropping stage %s" % id del self.stages[id] # Compute the amount of overlapped time between stages # (there should just be two stages, at the beginning, that overlap and run concurrently). # This computation assumes that not more than two stages overlap. print[ "%s: %s tasks" % (id, len(s.tasks)) for id, s in self.stages.iteritems() ] start_and_finish_times = [(id, s.start_time, s.conservative_finish_time()) for id, s in self.stages.iteritems()] start_and_finish_times.sort(key=lambda x: x[1]) self.overlap = 0 old_end = 0 previous_id = "" self.stages_to_combine = set() for id, start, finish in start_and_finish_times: if start < old_end: self.overlap += old_end - start print "Overlap:", self.overlap, "between ", id, "and", previous_id self.stages_to_combine.add(id) self.stages_to_combine.add(previous_id) old_end = max(old_end, finish) if finish > old_end: old_end = finish previous_id = id print "Stages to combine: ", self.stages_to_combine self.combined_stages_concurrency = -1 if len(self.stages_to_combine) > 0: tasks_for_combined_stages = [] for stage_id in self.stages_to_combine: tasks_for_combined_stages.extend(self.stages[stage_id].tasks) self.combined_stages_concurrency = concurrency.get_max_concurrency( tasks_for_combined_stages)
def __str__(self): max_task_runtime = max([t.runtime() for t in self.tasks]) if self.tasks[0].has_fetch: input_method = "shuffle" else: input_method = self.tasks[0].input_read_method return (("%s tasks (avg runtime: %s, max runtime: %s) Start: %s, runtime: %s, " "Max concurrency: %s, " "Input MB: %s (from %s), Output MB: %s, Straggers: %s, Progress rate straggers: %s, " "Progress rate stragglers explained by scheduler delay (%s), HDFS read (%s), " "HDFS and read (%s), GC (%s), Network (%s), JIT (%s), output rate stragglers: %s") % (len(self.tasks), self.average_task_runtime(), max_task_runtime, self.start_time, self.finish_time() - self.start_time, concurrency.get_max_concurrency(self.tasks), self.input_mb(), input_method, self.output_mb(), self.traditional_stragglers(), self.progress_rate_stragglers()[0], self.scheduler_delay_stragglers()[0], self.hdfs_read_stragglers()[0], self.hdfs_read_and_scheduler_delay_stragglers()[0], self.gc_stragglers()[0], self.network_stragglers()[0], self.jit_stragglers()[0], self.output_progress_rate_stragglers()[0]))
def no_stragglers_perfect_parallelism_speedup(self): """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """ ideal_runtime = 0 total_runtime_combined_stages = 0 for id, stage in self.stages.iteritems(): if id in self.stages_to_combine: total_runtime_combined_stages += stage.total_runtime() else: new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks) print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time) ideal_runtime += new_runtime print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency) ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency print "Getting simulated runtime" simulated_actual_runtime = self.get_simulated_runtime() print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime) return ideal_runtime / simulated_actual_runtime
def no_stragglers_perfect_parallelism_speedup(self): """ Returns how fast the job would have run if time were perfectly spread across 32 slots. """ ideal_runtime = 0 total_runtime_combined_stages = 0 for id, stage in self.stages.iteritems(): if id in self.stages_to_combine: total_runtime_combined_stages += stage.total_runtime() else: new_runtime = float(stage.total_runtime()) / concurrency.get_max_concurrency(stage.tasks) print "New runtime: %s, original runtime: %s" % (new_runtime, stage.finish_time() - stage.start_time) ideal_runtime += new_runtime print "Total runtime combined: %s (concurrency %d" % (total_runtime_combined_stages, self.combined_stages_concurrency) ideal_runtime += float(total_runtime_combined_stages) / self.combined_stages_concurrency print "Getting simulated runtime" simulated_actual_runtime = self.get_simulated_runtime() print "Ideal runtime for all: %s, simulated: %s" % (ideal_runtime, simulated_actual_runtime) return ideal_runtime / simulated_actual_runtime
def replace_all_tasks_with_average_speedup(self, prefix): """ Returns how much faster the job would have run if there were no stragglers. Eliminates stragglers by replacing each task's runtime with the average runtime for tasks in the job. """ self.print_heading("Computing speedup by averaging out stragglers") total_no_stragglers_runtime = 0 averaged_runtimes_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): averaged_runtimes = [stage.average_task_runtime()] * len( stage.tasks) if id in self.stages_to_combine: averaged_runtimes_for_combined_stages.extend(averaged_runtimes) else: no_stragglers_runtime, start_finish_times = simulate.simulate( averaged_runtimes, concurrency.get_max_concurrency(stage.tasks)) # Adjust the start and finish times based on when the stage staged. start_finish_times_adjusted = [ (start + total_no_stragglers_runtime, finish + total_no_stragglers_runtime) \ for start, finish in start_finish_times] total_no_stragglers_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) if len(averaged_runtimes_for_combined_stages) > 0: no_stragglers_runtime, start_finish_times = simulate.simulate( averaged_runtimes_for_combined_stages, self.combined_stages_concurrency) # Adjust the start and finish times based on when the stage staged. # The subtraction is a hack to put the combined stages at the beginning, which # is when they usually occur. start_finish_times_adjusted = [ (start - no_stragglers_runtime, finish - no_stragglers_runtime) for start, finish in start_finish_times ] total_no_stragglers_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) self.write_simulated_waterfall(all_start_finish_times, "%s_sim_no_stragglers" % prefix) return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
def replace_all_tasks_with_median_speedup(self): """ Returns how much faster the job would have run if there were no stragglers. Removes stragglers by replacing all task runtimes with the median runtime for tasks in the stage. """ total_no_stragglers_runtime = 0 runtimes_for_combined_stages = [] for id, stage in self.stages.iteritems(): runtimes = [task.runtime() for task in stage.tasks] median_runtime = numpy.median(runtimes) no_straggler_runtimes = [numpy.median(runtimes)] * len(stage.tasks) if id in self.stages_to_combine: runtimes_for_combined_stages.extend(no_straggler_runtimes) else: total_no_stragglers_runtime += simulate.simulate( no_straggler_runtimes, concurrency.get_max_concurrency(stage.tasks))[0] if len(runtimes_for_combined_stages) > 0: total_no_stragglers_runtime += simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency)[0] return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
def replace_all_tasks_with_median_speedup(self): """ Returns how much faster the job would have run if there were no stragglers. Removes stragglers by replacing all task runtimes with the median runtime for tasks in the stage. """ total_no_stragglers_runtime = 0 runtimes_for_combined_stages = [] for id, stage in self.stages.iteritems(): runtimes = [task.runtime() for task in stage.tasks] median_runtime = numpy.median(runtimes) no_straggler_runtimes = [numpy.median(runtimes)] * len(stage.tasks) if id in self.stages_to_combine: runtimes_for_combined_stages.extend(no_straggler_runtimes) else: total_no_stragglers_runtime += simulate.simulate( no_straggler_runtimes, concurrency.get_max_concurrency(stage.tasks))[0] if len(runtimes_for_combined_stages) > 0: total_no_stragglers_runtime += simulate.simulate( runtimes_for_combined_stages, self.combined_stages_concurrency)[0] return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
def replace_all_tasks_with_average_speedup(self, prefix): """ Returns how much faster the job would have run if there were no stragglers. Eliminates stragglers by replacing each task's runtime with the average runtime for tasks in the job. """ self.print_heading("Computing speedup by averaging out stragglers") total_no_stragglers_runtime = 0 averaged_runtimes_for_combined_stages = [] all_start_finish_times = [] for id, stage in self.stages.iteritems(): averaged_runtimes = [stage.average_task_runtime()] * len(stage.tasks) if id in self.stages_to_combine: averaged_runtimes_for_combined_stages.extend(averaged_runtimes) else: no_stragglers_runtime, start_finish_times = simulate.simulate( averaged_runtimes, concurrency.get_max_concurrency(stage.tasks)) # Adjust the start and finish times based on when the stage staged. start_finish_times_adjusted = [ (start + total_no_stragglers_runtime, finish + total_no_stragglers_runtime) \ for start, finish in start_finish_times] total_no_stragglers_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) if len(averaged_runtimes_for_combined_stages) > 0: no_stragglers_runtime, start_finish_times = simulate.simulate( averaged_runtimes_for_combined_stages, self.combined_stages_concurrency) # Adjust the start and finish times based on when the stage staged. # The subtraction is a hack to put the combined stages at the beginning, which # is when they usually occur. start_finish_times_adjusted = [ (start - no_stragglers_runtime, finish - no_stragglers_runtime) for start, finish in start_finish_times] total_no_stragglers_runtime += no_stragglers_runtime all_start_finish_times.append(start_finish_times_adjusted) self.write_simulated_waterfall(all_start_finish_times, "%s_sim_no_stragglers" % prefix) return total_no_stragglers_runtime * 1.0 / self.get_simulated_runtime()
print "Overlap:", self.overlap, "between ", id, "and", previous_id self.stages_to_combine.add(id) self.stages_to_combine.add(previous_id) old_end = max(old_end, finish) if finish > old_end: old_end = finish previous_id = id print "Stages to combine: ", self.stages_to_combine self.combined_stages_concurrency = -1 if len(self.stages_to_combine) > 0: tasks_for_combined_stages = [] for stage_id in self.stages_to_combine: tasks_for_combined_stages.extend(self.stages[stage_id].tasks) self.combined_stages_concurrency = concurrency.get_max_concurrency(tasks_for_combined_stages) def all_tasks(self): """ Returns a list of all tasks. """ return [task for stage in self.stages.values() for task in stage.tasks] def print_stage_info(self): for id, stage in self.stages.iteritems(): print "STAGE %s: %s" % (id, stage.verbose_str()) def print_heading(self, text): print "\n******** %s ********" % text def get_simulated_runtime(self, waterfall_prefix=""): """ Returns the simulated runtime for the job.