class FcfsScheduler(Scheduler): def __init__(self, options): super(FcfsScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.waiting_queue_of_jobs = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.waiting_queue_of_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): result = [] while len(self.waiting_queue_of_jobs) > 0: job = self.waiting_queue_of_jobs[0] if self.cpu_snapshot.free_processors_available_at( current_time) >= job.num_required_processors: self.waiting_queue_of_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: break return result
class ConservativeScheduler(Scheduler): def __init__(self, options): super(ConservativeScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unfinished_jobs_by_submit_time = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.append(job) self.cpu_snapshot.assignJobEarliest(job, current_time) return [ JobStartEvent(job.start_to_run_at_time, job) ] def new_events_on_job_termination(self, job, current_time): """ Here we delete the tail of job if it was ended before the duration declaration. It then reschedules the remaining jobs and returns a collection of new termination events (using the dictionary data structure) """ self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.remove(job) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return self._reschedule_jobs(current_time) def _reschedule_jobs(self, current_time): newEvents = [] for job in self.unfinished_jobs_by_submit_time: if job.start_to_run_at_time <= current_time: continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed) prev_start_to_run_at_time = job.start_to_run_at_time self.cpu_snapshot.delJobFromCpuSlices(job) self.cpu_snapshot.assignJobEarliest(job, current_time) assert prev_start_to_run_at_time >= job.start_to_run_at_time if prev_start_to_run_at_time != job.start_to_run_at_time: newEvents.append( JobStartEvent(job.start_to_run_at_time, job) ) return newEvents
class FcfsScheduler(Scheduler): def __init__(self, options): super(FcfsScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.waiting_queue_of_jobs = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.waiting_queue_of_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): result = [] while len(self.waiting_queue_of_jobs) > 0: job = self.waiting_queue_of_jobs[0] if self.cpu_snapshot.free_processors_available_at(current_time) >= job.num_required_processors: self.waiting_queue_of_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: break return result
class ConservativeScheduler(Scheduler): def __init__(self, options): super(ConservativeScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unfinished_jobs_by_submit_time = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.append(job) self.cpu_snapshot.assignJobEarliest(job, current_time) return [JobStartEvent(job.start_to_run_at_time, job)] def new_events_on_job_termination(self, job, current_time): """ Here we delete the tail of job if it was ended before the duration declaration. It then reschedules the remaining jobs and returns a collection of new termination events (using the dictionary data structure) """ self.cpu_snapshot.archive_old_slices(current_time) self.unfinished_jobs_by_submit_time.remove(job) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return self._reschedule_jobs(current_time) def _reschedule_jobs(self, current_time): newEvents = [] for job in self.unfinished_jobs_by_submit_time: if job.start_to_run_at_time <= current_time: continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed) prev_start_to_run_at_time = job.start_to_run_at_time self.cpu_snapshot.delJobFromCpuSlices(job) self.cpu_snapshot.assignJobEarliest(job, current_time) assert prev_start_to_run_at_time >= job.start_to_run_at_time if prev_start_to_run_at_time != job.start_to_run_at_time: newEvents.append(JobStartEvent(job.start_to_run_at_time, job)) return newEvents
class LogScheduler(Scheduler): def __init__(self, num_processors): super(LogScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.waiting_queue_of_jobs = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) result = [] #self.waiting_queue_of_jobs.append(job) result.append(JobStartEvent(current_time+job.actual_wait_time, job)) #return [ # JobStartEvent(current_time, job) # for job in self._log_schedule_jobs(current_time) #] return result def new_events_on_job_termination(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [] """return [ JobStartEvent(current_time, job) for job in self._log_schedule_jobs(current_time) ]""" def _schedule_jobs(self, current_time): result = [] while len(self.waiting_queue_of_jobs) > 0: job = self.waiting_queue_of_jobs[0] if self.cpu_snapshot.free_processors_available_at(current_time) >= job.num_required_processors: self.waiting_queue_of_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: break return result def _log_schedule_jobs(self, current_time): #by Siddharth result = [] return result """while len(self.waiting_queue_of_jobs) > 0:
class EasyBackfillScheduler(Scheduler): def __init__(self, options): super(EasyBackfillScheduler, self).__init__(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unscheduled_jobs = [] def new_events_on_job_submission(self, just_submitted_job, current_time): """ Here we first add the new job to the waiting list. We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ # TODO: a probable performance bottleneck because we reschedule all the # jobs. Knowing that only one new job is added allows more efficient # scheduling here. self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(just_submitted_job) retl = [] if (self.cpu_snapshot.free_processors_available_at(current_time) >= just_submitted_job.num_required_processors): for job in self._schedule_jobs(current_time): retl.append(JobStartEvent(current_time, job)) return retl def new_events_on_job_termination(self, job, current_time): """ Here we first delete the tail of the just terminated job (in case it's done before user estimation time). We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): """ Find jobs that can be backfilled and update the cpu snapshot. DEPRECATED FUNCTION !!!!!! """ if len(self.unscheduled_jobs) <= 1: return [] result = [] tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:]) first_job = self.unscheduled_jobs[0] self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_waiting_list: if self.cpu_snapshot.canJobStartNow(job, current_time): job.is_backfilled = 1 self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.unAssignJob(first_job) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ I_NEED_A_PREDICTOR = True def __init__(self, options): super(EasyPlusPlusScheduler, self).__init__(options) self.init_predictor(options) self.init_corrector(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unscheduled_jobs = [] def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.predictor.predict(job, current_time, self.running_jobs) if not hasattr(job,"initial_prediction"): job.initial_prediction=job.predicted_run_time self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.predictor.fit(job, current_time) if self.corrector.__name__=="ninetynine": self.pestimator.fit(job.actual_run_time/job.user_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): pass #assert job.predicted_run_time <= job.user_estimated_run_time if not hasattr(job,"num_underpredict"): job.num_underpredict = 0 else: job.num_underpredict += 1 if self.corrector.__name__=="ninetynine": new_predicted_run_time = self.corrector(self.pestimator,job,current_time) else: new_predicted_run_time = self.corrector(job, current_time) #set the new predicted runtime self.cpu_snapshot.assignTailofJobToTheCpuSlices(job, new_predicted_run_time) job.predicted_run_time = new_predicted_run_time return [JobStartEvent(current_time, job)] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): job.is_backfilled = 1 self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class OrigProbabilisticEasyScheduler(Scheduler): """ This algorithm implements a version of Feitelson and Nissimov, June 2007 """ def __init__(self, num_processors, threshold = 0.2, window_size=150): super(OrigProbabilisticEasyScheduler, self).__init__(num_processors) self.threshold = threshold self.window_size = window_size # a parameter for the distribution self.cpu_snapshot = CpuSnapshot(num_processors) self.user_distribution = {} self.unscheduled_jobs = [] self.currently_running_jobs = [] #self.work_list = [[None for i in xrange(self.num_processors+1)] for j in xrange(self.num_processors+1)] self.M = {} for c in xrange(self.num_processors+1): for n in xrange(self.num_processors+1): self.M[c, n] = 0.0 self.max_user_rounded_estimated_run_time = 0 self.prev_max_user_rounded_estimated_run_time = 0 def new_events_on_job_submission(self, job, current_time): # print "arrived:", job rounded_up_estimated_time = _round_time_up(job.user_estimated_run_time) if rounded_up_estimated_time > self.max_user_rounded_estimated_run_time: self.prev_max_user_rounded_estimated_run_time = self.max_user_rounded_estimated_run_time self.max_user_rounded_estimated_run_time = rounded_up_estimated_time if not self.user_distribution.has_key(job.user_id): self.user_distribution[job.user_id] = Distribution(job, self.window_size) self.user_distribution[job.user_id].touch(2*self.max_user_rounded_estimated_run_time) if self.prev_max_user_rounded_estimated_run_time < self.max_user_rounded_estimated_run_time: for tmp_job in self.currently_running_jobs: self.user_distribution[tmp_job.user_id].touch(2*self.max_user_rounded_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.user_distribution[job.user_id].add_job(job) self.currently_running_jobs.remove(job) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.currently_running_jobs.append(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) for job in tail: if self.can_be_probabilistically_backfilled(job, current_time): self.unscheduled_jobs.remove(job) self.currently_running_jobs.append(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) return result def can_be_probabilistically_backfilled(self, job, current_time): assert len(self.unscheduled_jobs) >= 2 assert job in self.unscheduled_jobs[1:] if self.cpu_snapshot.free_processors_available_at(current_time) < job.num_required_processors: return False first_job = self.unscheduled_jobs[0] prediction = 0.0 max_bottle_neck = 0.0 bottle_neck = 0.0 t = 1 while t < 2 * job.user_estimated_run_time: job_probability_to_end_at_t = self.probability_to_end_at(t, job) max_bottle_neck = max(max_bottle_neck, self.bottle_neck(t, job, first_job, current_time)) prediction += job_probability_to_end_at_t * max_bottle_neck t = t * 2 if prediction <= self.threshold: return True return False def bottle_neck(self, time, second_job, first_job, current_time): C = first_job.num_required_processors + second_job.num_required_processors K = min(self.num_processors, C) # M[n,c] is the probability that the first n running jobs will release at least c processors at _time_ M = self.M num_of_currently_running_jobs = len(self.currently_running_jobs) for c in xrange(K + 1): M[0, c] = 0.0 for n in xrange(1, num_of_currently_running_jobs+1): M[n, 0] = 1.0 for n in xrange(1, num_of_currently_running_jobs+1): job_n = self.currently_running_jobs[n-1] # the n'th job: recall that a list has a zero index job_n_required_processors = job_n.num_required_processors Pn = self.probability_of_running_job_to_end_upto(time, current_time, job_n) for c in xrange (1, job_n_required_processors): val = M[n-1, c] M[n, c] = val + (1.0 - val) * Pn for c in xrange (job_n_required_processors, K + 1): val = M[n-1, c] M[n, c] = val + (M[n, c - job_n_required_processors] - val) * Pn last_row_index = num_of_currently_running_jobs if C <= K: result = M[last_row_index, first_job.num_required_processors] - M[last_row_index, C] else: result = M[last_row_index, first_job.num_required_processors] if result < 0: result = 0.0 elif result > 1: reuslt = 1.0 assert 0 <= result <= 1 return result def probability_of_running_job_to_end_upto(self, time, current_time, job): run_time = current_time - job.start_to_run_at_time rounded_down_run_time = _round_time_down(run_time) rounded_up_estimated_remaining_duration = _round_time_up(job.user_estimated_run_time - rounded_down_run_time) if time >= rounded_up_estimated_remaining_duration: return 1.0 num_of_jobs_in_first_bins = 0 num_of_jobs_in_middle_bins = 0.0 num_of_jobs_in_last_bins = 0 job_distribution = self.user_distribution[job.user_id] for (key,value) in job_distribution.bins.iteritems(): if key > rounded_up_estimated_remaining_duration: num_of_jobs_in_last_bins += value elif key <= rounded_down_run_time: num_of_jobs_in_first_bins += value elif key <= time + rounded_down_run_time: num_of_jobs_in_middle_bins += value elif time + rounded_down_run_time > key/2 : num_of_jobs_in_middle_bins += float(value*(time+rounded_down_run_time-(key/2))) / (key/2) # at the tail of the middle bin, it won't terminate because of conditional probability #else: pass num_of_irrelevant_jobs = num_of_jobs_in_first_bins + num_of_jobs_in_last_bins num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_irrelevant_jobs assert 0 <= num_of_jobs_in_middle_bins <= num_of_relevant_jobs, \ str(num_of_jobs_in_middle_bins)+str(" ")+str(num_of_relevant_jobs) result = num_of_jobs_in_middle_bins / (num_of_relevant_jobs + 0.1) return result def probability_to_end_at(self, time, job): job_distribution = self.user_distribution[job.user_id] assert job_distribution.bins.has_key(time) == True num_of_jobs_in_last_bins = 0 rounded_up_user_estimated_run_time = 2 * job.user_estimated_run_time - 1 for key in job_distribution.bins.keys(): if key > rounded_up_user_estimated_run_time: num_of_jobs_in_last_bins += job_distribution.bins[key] num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_jobs_in_last_bins assert 0 <= job_distribution.bins[time] <= num_of_relevant_jobs,\ str(time)+str(" ")+ str(job_distribution.bins[time])+str(" ")+str(num_of_relevant_jobs) result = float(job_distribution.bins[time]) / (num_of_relevant_jobs + 0.1) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, num_processors): super(EasyPlusPlusScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] self.user_run_time_prev = {} self.user_run_time_last = {} def new_events_on_job_submission(self, job, current_time): if not self.user_run_time_last.has_key(job.user_id): self.user_run_time_prev[job.user_id] = None self.user_run_time_last[job.user_id] = None self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): assert self.user_run_time_last.has_key(job.user_id) == True assert self.user_run_time_prev.has_key(job.user_id) == True self.user_run_time_prev[job.user_id] = self.user_run_time_last[ job.user_id] self.user_run_time_last[job.user_id] = job.actual_run_time self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time self.cpu_snapshot.assignTailofJobToTheCpuSlices(job) job.predicted_run_time = job.user_estimated_run_time return [] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" for job in self.unscheduled_jobs: if self.user_run_time_prev[job.user_id] != None: average = int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id]) / 2) job.predicted_run_time = min(job.user_estimated_run_time, average) jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class OrigProbabilisticEasyScheduler(Scheduler): """ This algorithm implements a version of Feitelson and Nissimov, June 2007 """ def __init__(self, num_processors, threshold=0.2, window_size=150): super(OrigProbabilisticEasyScheduler, self).__init__(num_processors) self.threshold = threshold self.window_size = window_size # a parameter for the distribution self.cpu_snapshot = CpuSnapshot(num_processors) self.user_distribution = {} self.unscheduled_jobs = [] self.currently_running_jobs = [] #self.work_list = [[None for i in xrange(self.num_processors+1)] for j in xrange(self.num_processors+1)] self.M = {} for c in xrange(self.num_processors + 1): for n in xrange(self.num_processors + 1): self.M[c, n] = 0.0 self.max_user_rounded_estimated_run_time = 0 self.prev_max_user_rounded_estimated_run_time = 0 def new_events_on_job_submission(self, job, current_time): # print "arrived:", job rounded_up_estimated_time = _round_time_up(job.user_estimated_run_time) if rounded_up_estimated_time > self.max_user_rounded_estimated_run_time: self.prev_max_user_rounded_estimated_run_time = self.max_user_rounded_estimated_run_time self.max_user_rounded_estimated_run_time = rounded_up_estimated_time if not self.user_distribution.has_key(job.user_id): self.user_distribution[job.user_id] = Distribution( job, self.window_size) self.user_distribution[job.user_id].touch( 2 * self.max_user_rounded_estimated_run_time) if self.prev_max_user_rounded_estimated_run_time < self.max_user_rounded_estimated_run_time: for tmp_job in self.currently_running_jobs: self.user_distribution[tmp_job.user_id].touch( 2 * self.max_user_rounded_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.user_distribution[job.user_id].add_job(job) self.currently_running_jobs.remove(job) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.currently_running_jobs.append(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) for job in tail: if self.can_be_probabilistically_backfilled(job, current_time): self.unscheduled_jobs.remove(job) self.currently_running_jobs.append(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) return result def can_be_probabilistically_backfilled(self, job, current_time): assert len(self.unscheduled_jobs) >= 2 assert job in self.unscheduled_jobs[1:] if self.cpu_snapshot.free_processors_available_at( current_time) < job.num_required_processors: return False first_job = self.unscheduled_jobs[0] prediction = 0.0 max_bottle_neck = 0.0 bottle_neck = 0.0 t = 1 while t < 2 * job.user_estimated_run_time: job_probability_to_end_at_t = self.probability_to_end_at(t, job) max_bottle_neck = max( max_bottle_neck, self.bottle_neck(t, job, first_job, current_time)) prediction += job_probability_to_end_at_t * max_bottle_neck t = t * 2 if prediction <= self.threshold: return True return False def bottle_neck(self, time, second_job, first_job, current_time): C = first_job.num_required_processors + second_job.num_required_processors K = min(self.num_processors, C) # M[n,c] is the probability that the first n running jobs will release at least c processors at _time_ M = self.M num_of_currently_running_jobs = len(self.currently_running_jobs) for c in xrange(K + 1): M[0, c] = 0.0 for n in xrange(1, num_of_currently_running_jobs + 1): M[n, 0] = 1.0 for n in xrange(1, num_of_currently_running_jobs + 1): job_n = self.currently_running_jobs[ n - 1] # the n'th job: recall that a list has a zero index job_n_required_processors = job_n.num_required_processors Pn = self.probability_of_running_job_to_end_upto( time, current_time, job_n) for c in xrange(1, job_n_required_processors): val = M[n - 1, c] M[n, c] = val + (1.0 - val) * Pn for c in xrange(job_n_required_processors, K + 1): val = M[n - 1, c] M[n, c] = val + (M[n, c - job_n_required_processors] - val) * Pn last_row_index = num_of_currently_running_jobs if C <= K: result = M[last_row_index, first_job.num_required_processors] - M[last_row_index, C] else: result = M[last_row_index, first_job.num_required_processors] if result < 0: result = 0.0 elif result > 1: reuslt = 1.0 assert 0 <= result <= 1 return result def probability_of_running_job_to_end_upto(self, time, current_time, job): run_time = current_time - job.start_to_run_at_time rounded_down_run_time = _round_time_down(run_time) rounded_up_estimated_remaining_duration = _round_time_up( job.user_estimated_run_time - rounded_down_run_time) if time >= rounded_up_estimated_remaining_duration: return 1.0 num_of_jobs_in_first_bins = 0 num_of_jobs_in_middle_bins = 0.0 num_of_jobs_in_last_bins = 0 job_distribution = self.user_distribution[job.user_id] for (key, value) in job_distribution.bins.iteritems(): if key > rounded_up_estimated_remaining_duration: num_of_jobs_in_last_bins += value elif key <= rounded_down_run_time: num_of_jobs_in_first_bins += value elif key <= time + rounded_down_run_time: num_of_jobs_in_middle_bins += value elif time + rounded_down_run_time > key / 2: num_of_jobs_in_middle_bins += float( value * (time + rounded_down_run_time - (key / 2))) / (key / 2) # at the tail of the middle bin, it won't terminate because of conditional probability #else: pass num_of_irrelevant_jobs = num_of_jobs_in_first_bins + num_of_jobs_in_last_bins num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_irrelevant_jobs assert 0 <= num_of_jobs_in_middle_bins <= num_of_relevant_jobs, \ str(num_of_jobs_in_middle_bins)+str(" ")+str(num_of_relevant_jobs) result = num_of_jobs_in_middle_bins / (num_of_relevant_jobs + 0.1) return result def probability_to_end_at(self, time, job): job_distribution = self.user_distribution[job.user_id] assert job_distribution.bins.has_key(time) == True num_of_jobs_in_last_bins = 0 rounded_up_user_estimated_run_time = 2 * job.user_estimated_run_time - 1 for key in job_distribution.bins.keys(): if key > rounded_up_user_estimated_run_time: num_of_jobs_in_last_bins += job_distribution.bins[key] num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_jobs_in_last_bins assert 0 <= job_distribution.bins[time] <= num_of_relevant_jobs,\ str(time)+str(" ")+ str(job_distribution.bins[time])+str(" ")+str(num_of_relevant_jobs) result = float( job_distribution.bins[time]) / (num_of_relevant_jobs + 0.1) return result
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ def __init__(self, num_processors): super(EasyPlusPlusScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] self.user_run_time_prev = {} self.user_run_time_last = {} def new_events_on_job_submission(self, job, current_time): if not self.user_run_time_last.has_key(job.user_id): self.user_run_time_prev[job.user_id] = None self.user_run_time_last[job.user_id] = None self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): assert self.user_run_time_last.has_key(job.user_id) == True assert self.user_run_time_prev.has_key(job.user_id) == True self.user_run_time_prev[job.user_id] = self.user_run_time_last[job.user_id] self.user_run_time_last[job.user_id] = job.actual_run_time self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time self.cpu_snapshot.assignTailofJobToTheCpuSlices(job) job.predicted_run_time = job.user_estimated_run_time return [] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" for job in self.unscheduled_jobs: if self.user_run_time_prev[job.user_id] != None: average = int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id])/ 2) job.predicted_run_time = min (job.user_estimated_run_time, average) jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyBackfillScheduler(Scheduler): def __init__(self, num_processors): super(EasyBackfillScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] def new_events_on_job_submission(self, just_submitted_job, current_time): """ Here we first add the new job to the waiting list. We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ # TODO: a probable performance bottleneck because we reschedule all the # jobs. Knowing that only one new job is added allows more efficient # scheduling here. #print 'User submits', just_submitted_job self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(just_submitted_job) #print 'At time', current_time, 'unscheduled:', self.unscheduled_jobs return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): """ Here we first delete the tail of the just terminated job (in case it's done before user estimation time). We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) #print 'Currently schedulable jobs:', jobs #print 'Currently schedulable jobs count:', len(jobs) self.totalScheduledJobs += len(jobs) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): """ Find jobs that can be backfilled and update the cpu snapshot. """ if len(self.unscheduled_jobs) <= 1: return [] result = [] tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:]) for job in tail_of_waiting_list: if self.canBeBackfilled(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) return result def canBeBackfilled(self, second_job, current_time): assert len(self.unscheduled_jobs) >= 2 assert second_job in self.unscheduled_jobs[1:] if self.cpu_snapshot.free_processors_available_at(current_time) < second_job.num_required_processors: return False first_job = self.unscheduled_jobs[0] temp_cpu_snapshot = self.cpu_snapshot.copy() temp_cpu_snapshot.assignJobEarliest(first_job, current_time) # if true, this means that the 2nd job is "independent" of the 1st, and thus can be backfilled return temp_cpu_snapshot.canJobStartNow(second_job, current_time)
class EasyPlusPlusScheduler(Scheduler): """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007? """ I_NEED_A_PREDICTOR = True def __init__(self, options): super(EasyPlusPlusScheduler, self).__init__(options) self.init_predictor(options) self.init_corrector(options) self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"]) self.unscheduled_jobs = [] self.ff = open("times-epp-sgd.txt", 'w') def new_events_on_job_submission(self, job, current_time): self.cpu_snapshot.archive_old_slices(current_time) self.predictor.predict(job, current_time, self.running_jobs) self.ff.write("%d\t%d\n" % (job.actual_run_time, job.predicted_run_time)) self.ff.flush() if not hasattr(job, "initial_prediction"): job.initial_prediction = job.predicted_run_time self.unscheduled_jobs.append(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): self.predictor.fit(job, current_time) if self.corrector.__name__ == "ninetynine": self.pestimator.fit(job.actual_run_time / job.user_estimated_run_time) self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_under_prediction(self, job, current_time): assert job.predicted_run_time <= job.user_estimated_run_time if not hasattr(job, "num_underpredict"): job.num_underpredict = 0 else: job.num_underpredict += 1 if self.corrector.__name__ == "ninetynine": new_predicted_run_time = self.corrector(self.pestimator, job, current_time) else: new_predicted_run_time = self.corrector(job, current_time) #set the new predicted runtime self.cpu_snapshot.assignTailofJobToTheCpuSlices( job, new_predicted_run_time) job.predicted_run_time = new_predicted_run_time return [JobStartEvent(current_time, job)] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): if len(self.unscheduled_jobs) <= 1: return [] result = [] first_job = self.unscheduled_jobs[0] tail = list_copy(self.unscheduled_jobs[1:]) tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key) self.cpu_snapshot.assignJobEarliest(first_job, current_time) for job in tail_of_jobs_by_sjf_order: if self.cpu_snapshot.canJobStartNow(job, current_time): job.is_backfilled = 1 self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) self.cpu_snapshot.delJobFromCpuSlices(first_job) return result
class EasyBackfillScheduler(Scheduler): def __init__(self, num_processors): super(EasyBackfillScheduler, self).__init__(num_processors) self.cpu_snapshot = CpuSnapshot(num_processors) self.unscheduled_jobs = [] def new_events_on_job_submission(self, just_submitted_job, current_time): """ Here we first add the new job to the waiting list. We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ # TODO: a probable performance bottleneck because we reschedule all the # jobs. Knowing that only one new job is added allows more efficient # scheduling here. #print 'User submits', just_submitted_job self.cpu_snapshot.archive_old_slices(current_time) self.unscheduled_jobs.append(just_submitted_job) #print 'At time', current_time, 'unscheduled:', self.unscheduled_jobs return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def new_events_on_job_termination(self, job, current_time): """ Here we first delete the tail of the just terminated job (in case it's done before user estimation time). We then try to schedule the jobs in the waiting list, returning a collection of new termination events """ self.cpu_snapshot.archive_old_slices(current_time) self.cpu_snapshot.delTailofJobFromCpuSlices(job) return [ JobStartEvent(current_time, job) for job in self._schedule_jobs(current_time) ] def _schedule_jobs(self, current_time): "Schedules jobs that can run right now, and returns them" jobs = self._schedule_head_of_list(current_time) jobs += self._backfill_jobs(current_time) #print 'Currently schedulable jobs:', jobs #print 'Currently schedulable jobs count:', len(jobs) self.totalScheduledJobs += len(jobs) return jobs def _schedule_head_of_list(self, current_time): result = [] while True: if len(self.unscheduled_jobs) == 0: break # Try to schedule the first job if self.cpu_snapshot.free_processors_available_at( current_time ) >= self.unscheduled_jobs[0].num_required_processors: job = self.unscheduled_jobs.pop(0) self.cpu_snapshot.assignJob(job, current_time) result.append(job) else: # first job can't be scheduled break return result def _backfill_jobs(self, current_time): """ Find jobs that can be backfilled and update the cpu snapshot. """ if len(self.unscheduled_jobs) <= 1: return [] result = [] tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:]) for job in tail_of_waiting_list: if self.canBeBackfilled(job, current_time): self.unscheduled_jobs.remove(job) self.cpu_snapshot.assignJob(job, current_time) result.append(job) return result def canBeBackfilled(self, second_job, current_time): assert len(self.unscheduled_jobs) >= 2 assert second_job in self.unscheduled_jobs[1:] if self.cpu_snapshot.free_processors_available_at( current_time) < second_job.num_required_processors: return False first_job = self.unscheduled_jobs[0] temp_cpu_snapshot = self.cpu_snapshot.copy() temp_cpu_snapshot.assignJobEarliest(first_job, current_time) # if true, this means that the 2nd job is "independent" of the 1st, and thus can be backfilled return temp_cpu_snapshot.canJobStartNow(second_job, current_time)