Python CpuSnapshot.archive_old_slices示例，common.CpuSnapshot.archive_old_slices Python示例

示例#1

0

显示文件

文件： fcfs_scheduler.py 项目： algo74/predictsim

class FcfsScheduler(Scheduler):
    def __init__(self, options):
        super(FcfsScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.waiting_queue_of_jobs = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.waiting_queue_of_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        result = []
        while len(self.waiting_queue_of_jobs) > 0:
            job = self.waiting_queue_of_jobs[0]
            if self.cpu_snapshot.free_processors_available_at(
                    current_time) >= job.num_required_processors:
                self.waiting_queue_of_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                break
        return result

示例#2

0

显示文件

文件： conservative_scheduler.py 项目： jad-darrous/predictsim

class ConservativeScheduler(Scheduler):

    def __init__(self, options):
        super(ConservativeScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unfinished_jobs_by_submit_time = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.append(job)
        self.cpu_snapshot.assignJobEarliest(job, current_time)
        return [ JobStartEvent(job.start_to_run_at_time, job) ]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we delete the tail of job if it was ended before the duration declaration.
        It then reschedules the remaining jobs and returns a collection of new termination events
        (using the dictionary data structure) """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.remove(job)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return self._reschedule_jobs(current_time)

    def _reschedule_jobs(self, current_time):
        newEvents = []
        for job in self.unfinished_jobs_by_submit_time:
            if job.start_to_run_at_time <= current_time:
                continue # job started to run before, so it cannot be rescheduled (preemptions are not allowed)
            prev_start_to_run_at_time = job.start_to_run_at_time
            self.cpu_snapshot.delJobFromCpuSlices(job)
            self.cpu_snapshot.assignJobEarliest(job, current_time)
            assert prev_start_to_run_at_time >= job.start_to_run_at_time
            if prev_start_to_run_at_time != job.start_to_run_at_time:
                newEvents.append( JobStartEvent(job.start_to_run_at_time, job) )
        return newEvents

示例#3

0

显示文件

文件： fcfs_scheduler.py 项目： dinesh121991/predictsim

class FcfsScheduler(Scheduler):

    def __init__(self, options):
        super(FcfsScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.waiting_queue_of_jobs = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.waiting_queue_of_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def _schedule_jobs(self, current_time):
        result = []
        while len(self.waiting_queue_of_jobs) > 0:
            job = self.waiting_queue_of_jobs[0]
            if self.cpu_snapshot.free_processors_available_at(current_time) >= job.num_required_processors:
                self.waiting_queue_of_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                break
        return result

示例#4

0

显示文件

文件： conservative_scheduler.py 项目： algo74/predictsim

class ConservativeScheduler(Scheduler):
    def __init__(self, options):
        super(ConservativeScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unfinished_jobs_by_submit_time = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.append(job)
        self.cpu_snapshot.assignJobEarliest(job, current_time)
        return [JobStartEvent(job.start_to_run_at_time, job)]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we delete the tail of job if it was ended before the duration declaration.
        It then reschedules the remaining jobs and returns a collection of new termination events
        (using the dictionary data structure) """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unfinished_jobs_by_submit_time.remove(job)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return self._reschedule_jobs(current_time)

    def _reschedule_jobs(self, current_time):
        newEvents = []
        for job in self.unfinished_jobs_by_submit_time:
            if job.start_to_run_at_time <= current_time:
                continue  # job started to run before, so it cannot be rescheduled (preemptions are not allowed)
            prev_start_to_run_at_time = job.start_to_run_at_time
            self.cpu_snapshot.delJobFromCpuSlices(job)
            self.cpu_snapshot.assignJobEarliest(job, current_time)
            assert prev_start_to_run_at_time >= job.start_to_run_at_time
            if prev_start_to_run_at_time != job.start_to_run_at_time:
                newEvents.append(JobStartEvent(job.start_to_run_at_time, job))
        return newEvents

示例#5

0

显示文件

文件： log_scheduler.py 项目： kruthikavishwanath/project-to-predict-wait-times-in-tyrone-cluster

class LogScheduler(Scheduler):

    def __init__(self, num_processors):
        super(LogScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.waiting_queue_of_jobs = []

    def new_events_on_job_submission(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
	result = []        
	#self.waiting_queue_of_jobs.append(job)
	result.append(JobStartEvent(current_time+job.actual_wait_time, job))
        #return [
        #    JobStartEvent(current_time, job)
        #    for job in self._log_schedule_jobs(current_time)
        #]
	return result

    def new_events_on_job_termination(self, job, current_time):
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
	return []        
	"""return [
            JobStartEvent(current_time, job)
            for job in self._log_schedule_jobs(current_time)
        ]"""


    def _schedule_jobs(self, current_time):
        result = []
        while len(self.waiting_queue_of_jobs) > 0:
            job = self.waiting_queue_of_jobs[0]
            if self.cpu_snapshot.free_processors_available_at(current_time) >= job.num_required_processors:
                self.waiting_queue_of_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                break
        return result
    
    def _log_schedule_jobs(self, current_time):                 #by Siddharth
        result = []
	return result
        """while len(self.waiting_queue_of_jobs) > 0:

示例#6

0

显示文件

class EasyBackfillScheduler(Scheduler):

    def __init__(self, options):
        super(EasyBackfillScheduler, self).__init__(options)
        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []

    def new_events_on_job_submission(self, just_submitted_job, current_time):
        """ Here we first add the new job to the waiting list. We then try to schedule
        the jobs in the waiting list, returning a collection of new termination events """
        # TODO: a probable performance bottleneck because we reschedule all the
        # jobs. Knowing that only one new job is added allows more efficient
        # scheduling here.
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(just_submitted_job)
        
        retl = []
        
        if (self.cpu_snapshot.free_processors_available_at(current_time) >= just_submitted_job.num_required_processors):
		for job in self._schedule_jobs(current_time):
			retl.append(JobStartEvent(current_time, job))
        
        return retl

    def new_events_on_job_termination(self, job, current_time):
        """ Here we first delete the tail of the just terminated job (in case it's
        done before user estimation time). We then try to schedule the jobs in the waiting list,
        returning a collection of new termination events """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        """
        Find jobs that can be backfilled and update the cpu snapshot.
        DEPRECATED FUNCTION !!!!!!
        """
        if len(self.unscheduled_jobs) <= 1:
            return []
        
        result = []


        tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:])
        
        first_job = self.unscheduled_jobs[0]
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail_of_waiting_list:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
        self.cpu_snapshot.unAssignJob(first_job)

        return result

示例#7

0

显示文件

文件： easy_plus_plus_scheduler.py 项目： dinesh121991/predictsim

class  EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """

    I_NEED_A_PREDICTOR = True

    def __init__(self, options):
        super(EasyPlusPlusScheduler, self).__init__(options)
        self.init_predictor(options)
        self.init_corrector(options)

        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []


    def new_events_on_job_submission(self, job, current_time):

        self.cpu_snapshot.archive_old_slices(current_time)
        self.predictor.predict(job, current_time, self.running_jobs)
        if not hasattr(job,"initial_prediction"):
            job.initial_prediction=job.predicted_run_time
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_termination(self, job, current_time):
        self.predictor.fit(job, current_time)

        if self.corrector.__name__=="ninetynine":
            self.pestimator.fit(job.actual_run_time/job.user_estimated_run_time)

        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_under_prediction(self, job, current_time):
        pass #assert job.predicted_run_time <= job.user_estimated_run_time

        if not hasattr(job,"num_underpredict"):
            job.num_underpredict = 0
        else:
            job.num_underpredict += 1

        if self.corrector.__name__=="ninetynine":
            new_predicted_run_time = self.corrector(self.pestimator,job,current_time)
        else:
            new_predicted_run_time = self.corrector(job, current_time)

        #set the new predicted runtime
        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job, new_predicted_run_time)
        job.predicted_run_time = new_predicted_run_time

        return [JobStartEvent(current_time, job)]


    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs


    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result


    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail =  list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result

示例#8

0

显示文件

文件： orig_probabilistic_easy_scheduler.py 项目： jgera/pyss

class  OrigProbabilisticEasyScheduler(Scheduler):
    """ This algorithm implements a version of Feitelson and Nissimov, June 2007 
    """
    
    def __init__(self, num_processors, threshold = 0.2, window_size=150):
        super(OrigProbabilisticEasyScheduler, self).__init__(num_processors)
        self.threshold    = threshold
        self.window_size  = window_size # a parameter for the distribution 
        self.cpu_snapshot = CpuSnapshot(num_processors)
        
        self.user_distribution = {}

        self.unscheduled_jobs  = []
        self.currently_running_jobs = []
     
        #self.work_list = [[None for i in xrange(self.num_processors+1)] for j in xrange(self.num_processors+1)]
        self.M = {}        
        for c in xrange(self.num_processors+1):
            for n in xrange(self.num_processors+1):
                self.M[c, n] = 0.0

        self.max_user_rounded_estimated_run_time = 0
        self.prev_max_user_rounded_estimated_run_time = 0

        

    def new_events_on_job_submission(self, job, current_time):
        # print "arrived:", job
        rounded_up_estimated_time = _round_time_up(job.user_estimated_run_time)

        if  rounded_up_estimated_time > self.max_user_rounded_estimated_run_time:
            self.prev_max_user_rounded_estimated_run_time = self.max_user_rounded_estimated_run_time
            self.max_user_rounded_estimated_run_time = rounded_up_estimated_time
    
        if  not self.user_distribution.has_key(job.user_id):
            self.user_distribution[job.user_id] = Distribution(job, self.window_size)
        self.user_distribution[job.user_id].touch(2*self.max_user_rounded_estimated_run_time)

        if  self.prev_max_user_rounded_estimated_run_time < self.max_user_rounded_estimated_run_time:
            for tmp_job in self.currently_running_jobs:
                self.user_distribution[tmp_job.user_id].touch(2*self.max_user_rounded_estimated_run_time)
              
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_termination(self, job, current_time):
        self.user_distribution[job.user_id].add_job(job)
        self.currently_running_jobs.remove(job)
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs


    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.currently_running_jobs.append(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result
    

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result    = []  
        first_job = self.unscheduled_jobs[0]        
        tail      = list_copy(self.unscheduled_jobs[1:]) 
                
        for job in tail:
            if self.can_be_probabilistically_backfilled(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.currently_running_jobs.append(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)                
        return result


    def can_be_probabilistically_backfilled(self, job, current_time):
        assert len(self.unscheduled_jobs) >= 2
        assert job in self.unscheduled_jobs[1:]

        if self.cpu_snapshot.free_processors_available_at(current_time) < job.num_required_processors:
            return False

        first_job = self.unscheduled_jobs[0]

        prediction  = 0.0
        max_bottle_neck = 0.0 
        bottle_neck = 0.0 
        t = 1

        while t < 2 * job.user_estimated_run_time:
            job_probability_to_end_at_t = self.probability_to_end_at(t, job)
            max_bottle_neck = max(max_bottle_neck, self.bottle_neck(t, job, first_job, current_time))
            prediction += job_probability_to_end_at_t * max_bottle_neck
            t = t * 2 
    
        if prediction <= self.threshold:
            return True
         
        return False
        

    def bottle_neck(self, time, second_job, first_job, current_time):
        C = first_job.num_required_processors + second_job.num_required_processors
        K = min(self.num_processors, C)

        # M[n,c] is the probability that the first n running jobs will release at least c processors at _time_
        M = self.M

        num_of_currently_running_jobs = len(self.currently_running_jobs)
        
        for c in xrange(K + 1): 
            M[0, c] = 0.0
            
        for n in xrange(1, num_of_currently_running_jobs+1):
            M[n, 0] = 1.0

        for n in xrange(1, num_of_currently_running_jobs+1):
            job_n = self.currently_running_jobs[n-1] # the n'th job: recall that a list has a zero index   
            job_n_required_processors = job_n.num_required_processors
            Pn = self.probability_of_running_job_to_end_upto(time, current_time, job_n)
            for c in xrange (1, job_n_required_processors):
                val = M[n-1, c]
                M[n, c] = val + (1.0 - val) * Pn
            for c in xrange (job_n_required_processors, K + 1):
                val = M[n-1, c]
                M[n, c] = val + (M[n, c - job_n_required_processors] - val) * Pn

        last_row_index = num_of_currently_running_jobs
        if  C <= K:  
            result = M[last_row_index, first_job.num_required_processors] - M[last_row_index, C]
        else:   
            result = M[last_row_index, first_job.num_required_processors]

        if   result < 0:
            result = 0.0
        elif result > 1:
            reuslt = 1.0
            
        assert 0 <= result <= 1 
        return result 


    def probability_of_running_job_to_end_upto(self, time, current_time, job):

        run_time = current_time - job.start_to_run_at_time
        rounded_down_run_time = _round_time_down(run_time)
        rounded_up_estimated_remaining_duration = _round_time_up(job.user_estimated_run_time - rounded_down_run_time)
        if time >= rounded_up_estimated_remaining_duration:
            return 1.0 
    
        
        num_of_jobs_in_first_bins  = 0
        num_of_jobs_in_middle_bins = 0.0
        num_of_jobs_in_last_bins   = 0
        job_distribution = self.user_distribution[job.user_id]

        for (key,value) in job_distribution.bins.iteritems():

            if   key > rounded_up_estimated_remaining_duration:
                num_of_jobs_in_last_bins  += value
                
            elif key <= rounded_down_run_time:
                num_of_jobs_in_first_bins += value

            elif key <= time + rounded_down_run_time:
                num_of_jobs_in_middle_bins += value

            elif time + rounded_down_run_time > key/2 : 
                num_of_jobs_in_middle_bins += float(value*(time+rounded_down_run_time-(key/2))) / (key/2) 

            # at the tail of the middle bin, it won't terminate because of conditional probability
            #else: pass
          
        num_of_irrelevant_jobs = num_of_jobs_in_first_bins + num_of_jobs_in_last_bins
        num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_irrelevant_jobs

	assert 0 <= num_of_jobs_in_middle_bins <= num_of_relevant_jobs, \
               str(num_of_jobs_in_middle_bins)+str(" ")+str(num_of_relevant_jobs)

        result = num_of_jobs_in_middle_bins / (num_of_relevant_jobs + 0.1)

        return result 


    def probability_to_end_at(self, time, job):         
        job_distribution = self.user_distribution[job.user_id]
        assert job_distribution.bins.has_key(time) == True
	        
        num_of_jobs_in_last_bins = 0
        rounded_up_user_estimated_run_time = 2 * job.user_estimated_run_time - 1

        for key in job_distribution.bins.keys():  
            if key > rounded_up_user_estimated_run_time:
                num_of_jobs_in_last_bins  += job_distribution.bins[key]  
 
        num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_jobs_in_last_bins

	assert 0 <= job_distribution.bins[time] <= num_of_relevant_jobs,\
               str(time)+str(" ")+ str(job_distribution.bins[time])+str(" ")+str(num_of_relevant_jobs)

       	result = float(job_distribution.bins[time]) / (num_of_relevant_jobs + 0.1) 

        return result

示例#9

0

显示文件

文件： easy_plus_plus_scheduler.py 项目： tekinbicer/TraceWorkflow

class EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    def __init__(self, num_processors):
        super(EasyPlusPlusScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []
        self.user_run_time_prev = {}
        self.user_run_time_last = {}

    def new_events_on_job_submission(self, job, current_time):
        if not self.user_run_time_last.has_key(job.user_id):
            self.user_run_time_prev[job.user_id] = None
            self.user_run_time_last[job.user_id] = None

        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        assert self.user_run_time_last.has_key(job.user_id) == True
        assert self.user_run_time_prev.has_key(job.user_id) == True

        self.user_run_time_prev[job.user_id] = self.user_run_time_last[
            job.user_id]
        self.user_run_time_last[job.user_id] = job.actual_run_time
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job)
        job.predicted_run_time = job.user_estimated_run_time
        return []

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        for job in self.unscheduled_jobs:
            if self.user_run_time_prev[job.user_id] != None:
                average = int((self.user_run_time_last[job.user_id] +
                               self.user_run_time_prev[job.user_id]) / 2)
                job.predicted_run_time = min(job.user_estimated_run_time,
                                             average)

        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result

示例#10

0

显示文件

class OrigProbabilisticEasyScheduler(Scheduler):
    """ This algorithm implements a version of Feitelson and Nissimov, June 2007 
    """
    def __init__(self, num_processors, threshold=0.2, window_size=150):
        super(OrigProbabilisticEasyScheduler, self).__init__(num_processors)
        self.threshold = threshold
        self.window_size = window_size  # a parameter for the distribution
        self.cpu_snapshot = CpuSnapshot(num_processors)

        self.user_distribution = {}

        self.unscheduled_jobs = []
        self.currently_running_jobs = []

        #self.work_list = [[None for i in xrange(self.num_processors+1)] for j in xrange(self.num_processors+1)]
        self.M = {}
        for c in xrange(self.num_processors + 1):
            for n in xrange(self.num_processors + 1):
                self.M[c, n] = 0.0

        self.max_user_rounded_estimated_run_time = 0
        self.prev_max_user_rounded_estimated_run_time = 0

    def new_events_on_job_submission(self, job, current_time):
        # print "arrived:", job
        rounded_up_estimated_time = _round_time_up(job.user_estimated_run_time)

        if rounded_up_estimated_time > self.max_user_rounded_estimated_run_time:
            self.prev_max_user_rounded_estimated_run_time = self.max_user_rounded_estimated_run_time
            self.max_user_rounded_estimated_run_time = rounded_up_estimated_time

        if not self.user_distribution.has_key(job.user_id):
            self.user_distribution[job.user_id] = Distribution(
                job, self.window_size)
        self.user_distribution[job.user_id].touch(
            2 * self.max_user_rounded_estimated_run_time)

        if self.prev_max_user_rounded_estimated_run_time < self.max_user_rounded_estimated_run_time:
            for tmp_job in self.currently_running_jobs:
                self.user_distribution[tmp_job.user_id].touch(
                    2 * self.max_user_rounded_estimated_run_time)

        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        self.user_distribution[job.user_id].add_job(job)
        self.currently_running_jobs.remove(job)
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.currently_running_jobs.append(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])

        for job in tail:
            if self.can_be_probabilistically_backfilled(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.currently_running_jobs.append(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
        return result

    def can_be_probabilistically_backfilled(self, job, current_time):
        assert len(self.unscheduled_jobs) >= 2
        assert job in self.unscheduled_jobs[1:]

        if self.cpu_snapshot.free_processors_available_at(
                current_time) < job.num_required_processors:
            return False

        first_job = self.unscheduled_jobs[0]

        prediction = 0.0
        max_bottle_neck = 0.0
        bottle_neck = 0.0
        t = 1

        while t < 2 * job.user_estimated_run_time:
            job_probability_to_end_at_t = self.probability_to_end_at(t, job)
            max_bottle_neck = max(
                max_bottle_neck,
                self.bottle_neck(t, job, first_job, current_time))
            prediction += job_probability_to_end_at_t * max_bottle_neck
            t = t * 2

        if prediction <= self.threshold:
            return True

        return False

    def bottle_neck(self, time, second_job, first_job, current_time):
        C = first_job.num_required_processors + second_job.num_required_processors
        K = min(self.num_processors, C)

        # M[n,c] is the probability that the first n running jobs will release at least c processors at _time_
        M = self.M

        num_of_currently_running_jobs = len(self.currently_running_jobs)

        for c in xrange(K + 1):
            M[0, c] = 0.0

        for n in xrange(1, num_of_currently_running_jobs + 1):
            M[n, 0] = 1.0

        for n in xrange(1, num_of_currently_running_jobs + 1):
            job_n = self.currently_running_jobs[
                n - 1]  # the n'th job: recall that a list has a zero index
            job_n_required_processors = job_n.num_required_processors
            Pn = self.probability_of_running_job_to_end_upto(
                time, current_time, job_n)
            for c in xrange(1, job_n_required_processors):
                val = M[n - 1, c]
                M[n, c] = val + (1.0 - val) * Pn
            for c in xrange(job_n_required_processors, K + 1):
                val = M[n - 1, c]
                M[n,
                  c] = val + (M[n, c - job_n_required_processors] - val) * Pn

        last_row_index = num_of_currently_running_jobs
        if C <= K:
            result = M[last_row_index,
                       first_job.num_required_processors] - M[last_row_index,
                                                              C]
        else:
            result = M[last_row_index, first_job.num_required_processors]

        if result < 0:
            result = 0.0
        elif result > 1:
            reuslt = 1.0

        assert 0 <= result <= 1
        return result

    def probability_of_running_job_to_end_upto(self, time, current_time, job):

        run_time = current_time - job.start_to_run_at_time
        rounded_down_run_time = _round_time_down(run_time)
        rounded_up_estimated_remaining_duration = _round_time_up(
            job.user_estimated_run_time - rounded_down_run_time)
        if time >= rounded_up_estimated_remaining_duration:
            return 1.0

        num_of_jobs_in_first_bins = 0
        num_of_jobs_in_middle_bins = 0.0
        num_of_jobs_in_last_bins = 0
        job_distribution = self.user_distribution[job.user_id]

        for (key, value) in job_distribution.bins.iteritems():

            if key > rounded_up_estimated_remaining_duration:
                num_of_jobs_in_last_bins += value

            elif key <= rounded_down_run_time:
                num_of_jobs_in_first_bins += value

            elif key <= time + rounded_down_run_time:
                num_of_jobs_in_middle_bins += value

            elif time + rounded_down_run_time > key / 2:
                num_of_jobs_in_middle_bins += float(
                    value * (time + rounded_down_run_time -
                             (key / 2))) / (key / 2)

            # at the tail of the middle bin, it won't terminate because of conditional probability
            #else: pass

        num_of_irrelevant_jobs = num_of_jobs_in_first_bins + num_of_jobs_in_last_bins
        num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_irrelevant_jobs

        assert 0 <= num_of_jobs_in_middle_bins <= num_of_relevant_jobs, \
                      str(num_of_jobs_in_middle_bins)+str(" ")+str(num_of_relevant_jobs)

        result = num_of_jobs_in_middle_bins / (num_of_relevant_jobs + 0.1)

        return result

    def probability_to_end_at(self, time, job):
        job_distribution = self.user_distribution[job.user_id]
        assert job_distribution.bins.has_key(time) == True

        num_of_jobs_in_last_bins = 0
        rounded_up_user_estimated_run_time = 2 * job.user_estimated_run_time - 1

        for key in job_distribution.bins.keys():
            if key > rounded_up_user_estimated_run_time:
                num_of_jobs_in_last_bins += job_distribution.bins[key]

        num_of_relevant_jobs = job_distribution.number_of_jobs_added - num_of_jobs_in_last_bins

        assert 0 <= job_distribution.bins[time] <= num_of_relevant_jobs,\
                      str(time)+str(" ")+ str(job_distribution.bins[time])+str(" ")+str(num_of_relevant_jobs)

        result = float(
            job_distribution.bins[time]) / (num_of_relevant_jobs + 0.1)

        return result

示例#11

0

显示文件

文件： easy_plus_plus_scheduler.py 项目： jgera/pyss

class  EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """
    
    def __init__(self, num_processors):
        super(EasyPlusPlusScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []
        self.user_run_time_prev = {}
        self.user_run_time_last = {}

    
    def new_events_on_job_submission(self, job, current_time):
        if not self.user_run_time_last.has_key(job.user_id): 
            self.user_run_time_prev[job.user_id] = None 
            self.user_run_time_last[job.user_id] = None

        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_termination(self, job, current_time):
        assert self.user_run_time_last.has_key(job.user_id) == True
        assert self.user_run_time_prev.has_key(job.user_id) == True

        self.user_run_time_prev[job.user_id] = self.user_run_time_last[job.user_id]
        self.user_run_time_last[job.user_id] = job.actual_run_time
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]


    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        self.cpu_snapshot.assignTailofJobToTheCpuSlices(job)
        job.predicted_run_time = job.user_estimated_run_time
        return []


    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
   
        for job in self.unscheduled_jobs:
            if self.user_run_time_prev[job.user_id] != None: 
                average =  int((self.user_run_time_last[job.user_id] + self.user_run_time_prev[job.user_id])/ 2)
                job.predicted_run_time = min (job.user_estimated_run_time, average)

        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs


    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result
    

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []  
        first_job = self.unscheduled_jobs[0]        
        tail =  list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)
        
        self.cpu_snapshot.assignJobEarliest(first_job, current_time)
        
        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time): 
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
                
        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result

示例#12

0

显示文件

文件： easy_scheduler.py 项目： kruthikavishwanath/project-to-predict-wait-times-in-tyrone-cluster

class EasyBackfillScheduler(Scheduler):

    def __init__(self, num_processors):
        super(EasyBackfillScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []
        
    def new_events_on_job_submission(self, just_submitted_job, current_time):
        """ Here we first add the new job to the waiting list. We then try to schedule
        the jobs in the waiting list, returning a collection of new termination events """
        # TODO: a probable performance bottleneck because we reschedule all the
        # jobs. Knowing that only one new job is added allows more efficient
        # scheduling here.
        #print 'User submits', just_submitted_job
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(just_submitted_job)
        #print 'At time', current_time, 'unscheduled:', self.unscheduled_jobs
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we first delete the tail of the just terminated job (in case it's
        done before user estimation time). We then try to schedule the jobs in the waiting list,
        returning a collection of new termination events """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs  = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        #print 'Currently schedulable jobs:', jobs
        #print 'Currently schedulable jobs count:', len(jobs)
        self.totalScheduledJobs += len(jobs)
        return jobs

    def _schedule_head_of_list(self, current_time):     
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(current_time) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        """
        Find jobs that can be backfilled and update the cpu snapshot.
        """
        if len(self.unscheduled_jobs) <= 1:
            return []
        
        result = []


        tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:])
        
        for job in tail_of_waiting_list:
            if self.canBeBackfilled(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        return result 

    def canBeBackfilled(self, second_job, current_time):
        assert len(self.unscheduled_jobs) >= 2
        assert second_job in self.unscheduled_jobs[1:]

        if self.cpu_snapshot.free_processors_available_at(current_time) < second_job.num_required_processors:
            return False

        first_job = self.unscheduled_jobs[0]

        temp_cpu_snapshot = self.cpu_snapshot.copy()
        temp_cpu_snapshot.assignJobEarliest(first_job, current_time)

        # if true, this means that the 2nd job is "independent" of the 1st, and thus can be backfilled
        return temp_cpu_snapshot.canJobStartNow(second_job, current_time)

示例#13

0

显示文件

class EasyPlusPlusScheduler(Scheduler):
    """ This algorithm implements the algorithm in the paper of Tsafrir, Etzion, Feitelson, june 2007?
    """

    I_NEED_A_PREDICTOR = True

    def __init__(self, options):
        super(EasyPlusPlusScheduler, self).__init__(options)
        self.init_predictor(options)
        self.init_corrector(options)

        self.cpu_snapshot = CpuSnapshot(self.num_processors, options["stats"])
        self.unscheduled_jobs = []

        self.ff = open("times-epp-sgd.txt", 'w')

    def new_events_on_job_submission(self, job, current_time):

        self.cpu_snapshot.archive_old_slices(current_time)
        self.predictor.predict(job, current_time, self.running_jobs)

        self.ff.write("%d\t%d\n" %
                      (job.actual_run_time, job.predicted_run_time))
        self.ff.flush()

        if not hasattr(job, "initial_prediction"):
            job.initial_prediction = job.predicted_run_time
        self.unscheduled_jobs.append(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        self.predictor.fit(job, current_time)

        if self.corrector.__name__ == "ninetynine":
            self.pestimator.fit(job.actual_run_time /
                                job.user_estimated_run_time)

        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_under_prediction(self, job, current_time):
        assert job.predicted_run_time <= job.user_estimated_run_time

        if not hasattr(job, "num_underpredict"):
            job.num_underpredict = 0
        else:
            job.num_underpredict += 1

        if self.corrector.__name__ == "ninetynine":
            new_predicted_run_time = self.corrector(self.pestimator, job,
                                                    current_time)
        else:
            new_predicted_run_time = self.corrector(job, current_time)

        #set the new predicted runtime
        self.cpu_snapshot.assignTailofJobToTheCpuSlices(
            job, new_predicted_run_time)
        job.predicted_run_time = new_predicted_run_time

        return [JobStartEvent(current_time, job)]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"

        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []
        first_job = self.unscheduled_jobs[0]
        tail = list_copy(self.unscheduled_jobs[1:])
        tail_of_jobs_by_sjf_order = sorted(tail, key=sjf_sort_key)

        self.cpu_snapshot.assignJobEarliest(first_job, current_time)

        for job in tail_of_jobs_by_sjf_order:
            if self.cpu_snapshot.canJobStartNow(job, current_time):
                job.is_backfilled = 1
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        self.cpu_snapshot.delJobFromCpuSlices(first_job)

        return result

示例#14

0

显示文件

class EasyBackfillScheduler(Scheduler):
    def __init__(self, num_processors):
        super(EasyBackfillScheduler, self).__init__(num_processors)
        self.cpu_snapshot = CpuSnapshot(num_processors)
        self.unscheduled_jobs = []

    def new_events_on_job_submission(self, just_submitted_job, current_time):
        """ Here we first add the new job to the waiting list. We then try to schedule
        the jobs in the waiting list, returning a collection of new termination events """
        # TODO: a probable performance bottleneck because we reschedule all the
        # jobs. Knowing that only one new job is added allows more efficient
        # scheduling here.
        #print 'User submits', just_submitted_job
        self.cpu_snapshot.archive_old_slices(current_time)
        self.unscheduled_jobs.append(just_submitted_job)
        #print 'At time', current_time, 'unscheduled:', self.unscheduled_jobs
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def new_events_on_job_termination(self, job, current_time):
        """ Here we first delete the tail of the just terminated job (in case it's
        done before user estimation time). We then try to schedule the jobs in the waiting list,
        returning a collection of new termination events """
        self.cpu_snapshot.archive_old_slices(current_time)
        self.cpu_snapshot.delTailofJobFromCpuSlices(job)
        return [
            JobStartEvent(current_time, job)
            for job in self._schedule_jobs(current_time)
        ]

    def _schedule_jobs(self, current_time):
        "Schedules jobs that can run right now, and returns them"
        jobs = self._schedule_head_of_list(current_time)
        jobs += self._backfill_jobs(current_time)
        #print 'Currently schedulable jobs:', jobs
        #print 'Currently schedulable jobs count:', len(jobs)
        self.totalScheduledJobs += len(jobs)
        return jobs

    def _schedule_head_of_list(self, current_time):
        result = []
        while True:
            if len(self.unscheduled_jobs) == 0:
                break
            # Try to schedule the first job
            if self.cpu_snapshot.free_processors_available_at(
                    current_time
            ) >= self.unscheduled_jobs[0].num_required_processors:
                job = self.unscheduled_jobs.pop(0)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)
            else:
                # first job can't be scheduled
                break
        return result

    def _backfill_jobs(self, current_time):
        """
        Find jobs that can be backfilled and update the cpu snapshot.
        """
        if len(self.unscheduled_jobs) <= 1:
            return []

        result = []

        tail_of_waiting_list = list_copy(self.unscheduled_jobs[1:])

        for job in tail_of_waiting_list:
            if self.canBeBackfilled(job, current_time):
                self.unscheduled_jobs.remove(job)
                self.cpu_snapshot.assignJob(job, current_time)
                result.append(job)

        return result

    def canBeBackfilled(self, second_job, current_time):
        assert len(self.unscheduled_jobs) >= 2
        assert second_job in self.unscheduled_jobs[1:]

        if self.cpu_snapshot.free_processors_available_at(
                current_time) < second_job.num_required_processors:
            return False

        first_job = self.unscheduled_jobs[0]

        temp_cpu_snapshot = self.cpu_snapshot.copy()
        temp_cpu_snapshot.assignJobEarliest(first_job, current_time)

        # if true, this means that the 2nd job is "independent" of the 1st, and thus can be backfilled
        return temp_cpu_snapshot.canJobStartNow(second_job, current_time)