def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.job_x = {} #machine learning thing m = LinearModel(self.n_features) if options["loss"] == "squaredloss": from valopt.losses.squared_loss import SquaredLoss l = SquaredLoss(m) elif options["loss"] == "absloss": from valopt.losses.abs_loss import AbsLoss l = AbsLoss(m) elif options["loss"] == "weightedsquaredloss": from valopt.losses.weighted_squared_loss import WeightedSquaredLoss l = WeightedSquaredLoss(m) else: raise ValueError( "predictor config error: no valid loss specified.") if "max_runtime" in options.keys(): self.max_runtime = options["max_runtime"] else: self.max_runtime = False self.model = NAG(m, l, options["eta"], verbose=False)
def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.job_x= {} #machine learning thing m=LinearModel(self.n_features) if options["loss"]=="squaredloss": from valopt.losses.squared_loss import SquaredLoss l=SquaredLoss(m) elif options["loss"]=="absloss": from valopt.losses.abs_loss import AbsLoss l=AbsLoss(m) elif options["loss"]=="weightedsquaredloss": from valopt.losses.weighted_squared_loss import WeightedSquaredLoss l=WeightedSquaredLoss(m) else: raise ValueError("predictor config error: no valid loss specified.") if "max_runtime" in options.keys(): self.max_runtime=options["max_runtime"] else: self.max_runtime=False self.model=NAG(m,l,options["eta"],verbose=False)
def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.user_sum_runtimes = {} self.user_sum_cores = {} self.user_n_jobs = {} self.job_x= {} self.user_last_ending = {} #Statistics oriented. self.last_loss=0 if options["scheduler"]["predictor"]["quadratic"]: print("Using predictor with quadratic features") self.quadratic=True if options["scheduler"]["predictor"]["cubic"]: self.cubic=True self.n_features=int(3*self.n_features+kPn(self.n_features,2)+kPn(self.n_features,3)) else: self.n_features=int(3*self.n_features+2*kPn(self.n_features,2)) self.cubic=False else: self.cubic=False self.quadratic=False #machine learning thing m=LinearModel(self.n_features) if "max_runtime" in options["scheduler"]["predictor"].keys(): self.max_runtime=options["scheduler"]["predictor"]["max_runtime"] else: self.max_runtime=False if options["scheduler"]["predictor"]["loss"]=="squaredloss": from valopt.losses.squared_loss import SquaredLoss l=SquaredLoss(m,maxloss=self.max_runtime) elif options["scheduler"]["predictor"]["loss"]=="composite": from valopt.losses.composite import CompositeLoss if options["scheduler"]["predictor"]["leftside"]=="abs": from valopt.losses.losscurves.abs import Abscurve leftside=Abscurve(m,options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"]=="square": from valopt.losses.losscurves.square import Squarecurve leftside=Squarecurve(m,options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"]=="exp": from valopt.losses.losscurves.exp import Expcurve leftside=Expcurve(m,options["scheduler"]["predictor"]["leftparam"]) else: raise ValueError("predictor config error: no leftside specified") if options["scheduler"]["predictor"]["rightside"]=="abs": from valopt.losses.losscurves.abs import Abscurve rightside=Abscurve(m,options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"]=="square": from valopt.losses.losscurves.square import Squarecurve rightside=Squarecurve(m,options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"]=="exp": from valopt.losses.losscurves.exp import Expcurve rightside=Expcurve(m,options["scheduler"]["predictor"]["rightparam"]) else: raise ValueError("predictor config error: no rightside specified") l=CompositeLoss(m,rightside,leftside,options["scheduler"]["predictor"]["threshold"]) else: raise ValueError("predictor config error: no valid loss specified.") if "lambda" in options["scheduler"]["predictor"].keys(): if options["scheduler"]["predictor"]["regularization"]=="l1": from valopt.losses.regularizations.l1 import L1 from valopt.losses.regularized_loss import RegularizedLoss l=RegularizedLoss(m,l,L1(),options["scheduler"]["predictor"]["lambda"]) elif options["scheduler"]["predictor"]["regularization"]=="l2": from valopt.losses.regularizations.l2 import L2 from valopt.losses.regularized_loss import RegularizedLoss l=RegularizedLoss(m,l,L2(),options["scheduler"]["predictor"]["lambda"]) else: raise ValueError("predictor config error: lambda present and no valid regularizer specified.") if options["scheduler"]["predictor"]["gd"]=="NAG": from valopt.algos.nag import NAG self.model=NAG(m,l,options["scheduler"]["predictor"]["eta"],verbose=False) elif options["scheduler"]["predictor"]["gd"]=="sNAG": from valopt.algos.snag import sNAG self.model=sNAG(m,l,options["scheduler"]["predictor"]["eta"],verbose=False) if not options["scheduler"]["predictor"]["weight"]: wstr=compile("1", "<string>", "eval") else: wstr=compile(options["scheduler"]["predictor"]["weight"], "<string>", "eval") def weight(job): m=float(job.num_required_processors) r=float(job.actual_run_time) #log=np.log log=math.log return eval(wstr) self.weight=weight
class PredictorSgdlinear(Predictor): #Internal info n_features=19 def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.user_sum_runtimes = {} self.user_sum_cores = {} self.user_n_jobs = {} self.job_x= {} self.user_last_ending = {} #Statistics oriented. self.last_loss=0 if options["scheduler"]["predictor"]["quadratic"]: print("Using predictor with quadratic features") self.quadratic=True if options["scheduler"]["predictor"]["cubic"]: self.cubic=True self.n_features=int(3*self.n_features+kPn(self.n_features,2)+kPn(self.n_features,3)) else: self.n_features=int(3*self.n_features+2*kPn(self.n_features,2)) self.cubic=False else: self.cubic=False self.quadratic=False #machine learning thing m=LinearModel(self.n_features) if "max_runtime" in options["scheduler"]["predictor"].keys(): self.max_runtime=options["scheduler"]["predictor"]["max_runtime"] else: self.max_runtime=False if options["scheduler"]["predictor"]["loss"]=="squaredloss": from valopt.losses.squared_loss import SquaredLoss l=SquaredLoss(m,maxloss=self.max_runtime) elif options["scheduler"]["predictor"]["loss"]=="composite": from valopt.losses.composite import CompositeLoss if options["scheduler"]["predictor"]["leftside"]=="abs": from valopt.losses.losscurves.abs import Abscurve leftside=Abscurve(m,options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"]=="square": from valopt.losses.losscurves.square import Squarecurve leftside=Squarecurve(m,options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"]=="exp": from valopt.losses.losscurves.exp import Expcurve leftside=Expcurve(m,options["scheduler"]["predictor"]["leftparam"]) else: raise ValueError("predictor config error: no leftside specified") if options["scheduler"]["predictor"]["rightside"]=="abs": from valopt.losses.losscurves.abs import Abscurve rightside=Abscurve(m,options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"]=="square": from valopt.losses.losscurves.square import Squarecurve rightside=Squarecurve(m,options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"]=="exp": from valopt.losses.losscurves.exp import Expcurve rightside=Expcurve(m,options["scheduler"]["predictor"]["rightparam"]) else: raise ValueError("predictor config error: no rightside specified") l=CompositeLoss(m,rightside,leftside,options["scheduler"]["predictor"]["threshold"]) else: raise ValueError("predictor config error: no valid loss specified.") if "lambda" in options["scheduler"]["predictor"].keys(): if options["scheduler"]["predictor"]["regularization"]=="l1": from valopt.losses.regularizations.l1 import L1 from valopt.losses.regularized_loss import RegularizedLoss l=RegularizedLoss(m,l,L1(),options["scheduler"]["predictor"]["lambda"]) elif options["scheduler"]["predictor"]["regularization"]=="l2": from valopt.losses.regularizations.l2 import L2 from valopt.losses.regularized_loss import RegularizedLoss l=RegularizedLoss(m,l,L2(),options["scheduler"]["predictor"]["lambda"]) else: raise ValueError("predictor config error: lambda present and no valid regularizer specified.") if options["scheduler"]["predictor"]["gd"]=="NAG": from valopt.algos.nag import NAG self.model=NAG(m,l,options["scheduler"]["predictor"]["eta"],verbose=False) elif options["scheduler"]["predictor"]["gd"]=="sNAG": from valopt.algos.snag import sNAG self.model=sNAG(m,l,options["scheduler"]["predictor"]["eta"],verbose=False) if not options["scheduler"]["predictor"]["weight"]: wstr=compile("1", "<string>", "eval") else: wstr=compile(options["scheduler"]["predictor"]["weight"], "<string>", "eval") def weight(job): m=float(job.num_required_processors) r=float(job.actual_run_time) #log=np.log log=math.log return eval(wstr) self.weight=weight def make_x(self,job,current_time,list_running_jobs): """Make a vector from a job. requires job, current time and system state.""" #x=np.empty(self.n_features,dtype=np.float32) x=[0]*self.n_features #checks on user internal memory if not self.user_job_last1.has_key(job.user_id): self.user_job_last1[job.user_id] = None if not self.user_job_last2.has_key(job.user_id): self.user_job_last2[job.user_id] = None if not self.user_job_last3.has_key(job.user_id): self.user_job_last3[job.user_id] = None if not self.user_sum_cores.has_key(job.user_id): self.user_sum_cores[job.user_id] = 0.0 if not self.user_sum_runtimes.has_key(job.user_id): self.user_sum_runtimes[job.user_id] = 0.0 if not self.user_n_jobs.has_key(job.user_id): self.user_n_jobs[job.user_id] = 0.0 if not self.user_last_ending.has_key(job.user_id): self.user_last_ending[job.user_id] = 0.0 #TODO:make x #x[0] is 1 #x[1] is last user run time #x[2] is last user run time2 #x[3] is last user run time3 #x[4] is user request #x[5] is moving average(3) #x[6] is moving average(2) #x[7] is user runtime mean #x[8] is time since last time a job of the user ended. #Turning linear model into affine model x[0]=1.0 #Last runtime if self.user_job_last1[job.user_id] != None: j1= self.user_job_last1[job.user_id] if j1.submit_time+j1.actual_run_time>current_time: last=float(j1.actual_run_time) else: last=float(current_time-j1.submit_time) else: last=float(job.user_estimated_run_time) x[1] = float(min(job.user_estimated_run_time, last)) #Last runtime2 if self.user_job_last2[job.user_id] != None: j2= self.user_job_last2[job.user_id] if j2.submit_time+j2.actual_run_time>current_time: last=j2.actual_run_time else: last=current_time-j2.submit_time else: last=job.user_estimated_run_time x[2] = float(min(job.user_estimated_run_time, last)) #Last runtime3 if self.user_job_last3[job.user_id] != None: j3= self.user_job_last3[job.user_id] if j3.submit_time+j3.actual_run_time>current_time: last=j3.actual_run_time else: last=current_time-j3.submit_time else: last=job.user_estimated_run_time x[3] = float(min(job.user_estimated_run_time, last)) #Required_time (aka user estimated run time) x[4]= float(job.user_estimated_run_time) #Moving averages if self.user_job_last3[job.user_id] != None: x[6]=0.33*(x[1]+x[2]+x[3]) x[5]=0.5*(x[1]+x[2]) elif self.user_job_last2[job.user_id] != None: x[5]=0.5*(x[1]+x[2]) x[6]=x[5] elif self.user_job_last1[job.user_id] != None: x[5]=x[1] x[6]=x[5] else: x[5]=float(job.user_estimated_run_time) x[6]=x[5] #User run time mean if not self.user_n_jobs[job.user_id] ==0: x[7]=float(self.user_sum_runtimes[job.user_id])/float(self.user_n_jobs[job.user_id]) #print "ifed" #print x[7] else: x[7]=0.0 #print "elsed" #T since Last job ending of this user if not self.user_last_ending[job.user_id]==0.0: x[8]=float(current_time-self.user_last_ending[job.user_id]) else: x[8]=0.0 #Ratio of Cores from user mean to this one. #User cores mean if not self.user_n_jobs[job.user_id] ==0.0: coremean=float(self.user_sum_cores[job.user_id])/float(self.user_n_jobs[job.user_id]) x[9]=float(job.num_required_processors)/coremean else: x[9]=0.0 running_mine=[j for j in list_running_jobs if j.user_id==job.user_id] #total cores running by this user x[10]=float(sum([j.num_required_processors for j in running_mine])) #sum of runtime of already running jobs of the user lengths_running=[current_time-j.start_to_run_at_time for j in running_mine] x[11]=float(sum(lengths_running)) #amount of jobs of this user already running x[12]=float(len(running_mine)) #length of longest job of user already running if len(lengths_running)==0: x[13]=0.0 else: x[13]=float(max(lengths_running)) #second of day sec_of_day=2.0*math.pi*float(job.submit_time % (3600*24))/(3600.0*24.0) #cos second of day x[14]=math.cos(sec_of_day) #sin second of day x[15]=math.sin(sec_of_day) # #day of week trough seconds: day_of_week= 2.0*math.pi*float(job.submit_time % (3600*24*7))/(3600.0*24.0*7.0) #cos day of week x[16]=math.cos(day_of_week) # #sin day of week x[17]=math.sin(day_of_week) # #Job cores x[18]=float(job.num_required_processors) if self.quadratic: i=19 for a,b in itertools.combinations(x[1:17],2): x[i]=a*b i+=1 for k in range(1,19): x[i]=x[k]*x[k] i+=1 #for k in range(1,19): #x[i]=1/max(0.001,x[k]) #i+=1 #for a,b in itertools.combinations(x[1:17],2): #x[i]=1/max(0.001,a*b) #i+=1 if self.cubic: for a,b,c in itertools.combinations(x[1:17],3): x[i]=a*b*c i+=1 for k in range(1,19): x[i]=x[k]*x[k]*x[k] i+=1 #if self.cubic: #for a,b,c in itertools.combinations(x[0:17],3): #x[i]=a*b*c #i+=1 return x def store_x(self,job,x): """store x for a given job if its not already stored""" if job not in self.job_x: self.job_x[job]=x def pop_x(self, job): """retrieve x for a given job and delete it from memory""" x=self.job_x.pop(job,[]) if x==[]: raise ValueError("Predictor internal x memory failed.") return x def predict(self, job, current_time, list_running_jobs): """ Modify the predicted_run_time of a job. Called when a job is submitted to the system. """ if not job in self.job_x: #make x x=self.make_x(job,current_time,list_running_jobs) #store x self.store_x(job,x) else: x=self.job_x[job] #make the prediction fff = abs(self.model.predict(x)) job.predicted_run_time=int(max(1.0,int(fff))) job.predicted_run_time=int(min(job.predicted_run_time,job.user_estimated_run_time)) if not self.max_runtime==False: job.predicted_run_time=int(max(1.0,min(job.predicted_run_time,self.max_runtime))) #return self.model.loss.loss(x,job.actual_run_time,self.weight(job)) def fit(self, job, current_time): """ Add a job to the learning algorithm. Called when a job end. """ #pop x from internal data x=self.pop_x(job) #updating our data #store user previous run time history assert self.user_job_last1.has_key(job.user_id) == True assert self.user_job_last2.has_key(job.user_id) == True assert self.user_job_last3.has_key(job.user_id) == True assert self.user_sum_runtimes.has_key(job.user_id) == True assert self.user_sum_cores.has_key(job.user_id) == True assert self.user_n_jobs.has_key(job.user_id) == True assert self.user_last_ending.has_key(job.user_id) == True self.user_job_last3[job.user_id] = self.user_job_last2[job.user_id] self.user_job_last2[job.user_id] = self.user_job_last1[job.user_id] self.user_job_last1[job.user_id] = job self.user_n_jobs[job.user_id]+=1 self.user_sum_runtimes[job.user_id]+=job.actual_run_time self.user_sum_cores[job.user_id]+=job.num_required_processors self.user_last_ending[job.user_id]=current_time #fit the model self.model.fit(x,job.actual_run_time,w=self.weight(job))
class PredictorSgdlinearSimple(Predictor): #Internal info n_features=2 def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.job_x= {} #machine learning thing m=LinearModel(self.n_features) if options["loss"]=="squaredloss": from valopt.losses.squared_loss import SquaredLoss l=SquaredLoss(m) elif options["loss"]=="absloss": from valopt.losses.abs_loss import AbsLoss l=AbsLoss(m) elif options["loss"]=="weightedsquaredloss": from valopt.losses.weighted_squared_loss import WeightedSquaredLoss l=WeightedSquaredLoss(m) else: raise ValueError("predictor config error: no valid loss specified.") if "max_runtime" in options.keys(): self.max_runtime=options["max_runtime"] else: self.max_runtime=False self.model=NAG(m,l,options["eta"],verbose=False) def make_x(self,job,current_time,list_running_jobs): """Make a vector from a job. requires job, current time and system state.""" x=np.empty(self.n_features,dtype=np.float32) #checks on user internal memory if not self.user_job_last1.has_key(job.user_id): self.user_job_last1[job.user_id] = None if not self.user_job_last2.has_key(job.user_id): self.user_job_last2[job.user_id] = None if not self.user_job_last3.has_key(job.user_id): self.user_job_last3[job.user_id] = None #TODO:make x #x[0] is user estimated run time #x[1] is p_i-1, p_i-2 mean #Required_time (aka user estimated run time) x[0]= job.user_estimated_run_time #Moving averages if self.user_job_last2[job.user_id] != None: #TODO:check if we know already the 2 last run time, take a choice. j1= self.user_job_last1[job.user_id] j2= self.user_job_last2[job.user_id] if j1.submit_time+j1.actual_run_time>current_time: last1=j1.actual_run_time else: last1=current_time-j1.submit_time if j2.submit_time+j2.actual_run_time>current_time: last2=j2.actual_run_time else: last2=current_time-j2.submit_time average = float((last1+last2)/ 2) x[1] = min(job.user_estimated_run_time, average) elif self.user_job_last1[job.user_id] != None: #TODO:check if we know already the last run time, take a choice. j1= self.user_job_last1[job.user_id] if j1.submit_time+j1.actual_run_time>current_time: last=j1.actual_run_time else: last=current_time-j1.submit_time x[1] = min(job.user_estimated_run_time, last) else: x[1] = job.user_estimated_run_time return x def store_x(self,job,x): """store x for a given job if its not already stored""" if job not in self.job_x.keys(): self.job_x[job]=x def pop_x(self, job): """retrieve x for a given job and delete it from memory""" x=self.job_x.pop(job,[]) if x==[]: raise ValueError("Predictor internal x memory failed.") return x def predict(self, job, current_time, list_running_jobs): """ Modify the predicted_run_time of a job. Called when a job is submitted to the system. """ #make x x=self.make_x(job,current_time,list_running_jobs) #store x self.store_x(job,x) #make the prediction job.predicted_run_time=abs(self.model.predict(x)) if not self.max_runtime==False: job.predicted_run_time=min(job.predicted_run_time,self.max_runtime) def fit(self, job, current_time): """ Add a job to the learning algorithm. Called when a job end. """ #pop x from internal data x=self.pop_x(job) #updating our data #store user previous run time history assert self.user_job_last1.has_key(job.user_id) == True assert self.user_job_last2.has_key(job.user_id) == True assert self.user_job_last3.has_key(job.user_id) == True self.user_job_last3[job.user_id] = self.user_job_last2[job.user_id] self.user_job_last2[job.user_id] = self.user_job_last1[job.user_id] self.user_job_last1[job.user_id] = job #fit the model self.model.fit(x,job.actual_run_time,p=10*np.log(1+(job.actual_run_time/min(1,job.num_required_processors))))
class PredictorSgdlinearSimple(Predictor): #Internal info n_features = 2 def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.job_x = {} #machine learning thing m = LinearModel(self.n_features) if options["loss"] == "squaredloss": from valopt.losses.squared_loss import SquaredLoss l = SquaredLoss(m) elif options["loss"] == "absloss": from valopt.losses.abs_loss import AbsLoss l = AbsLoss(m) elif options["loss"] == "weightedsquaredloss": from valopt.losses.weighted_squared_loss import WeightedSquaredLoss l = WeightedSquaredLoss(m) else: raise ValueError( "predictor config error: no valid loss specified.") if "max_runtime" in options.keys(): self.max_runtime = options["max_runtime"] else: self.max_runtime = False self.model = NAG(m, l, options["eta"], verbose=False) def make_x(self, job, current_time, list_running_jobs): """Make a vector from a job. requires job, current time and system state.""" x = np.empty(self.n_features, dtype=np.float32) #checks on user internal memory if not self.user_job_last1.has_key(job.user_id): self.user_job_last1[job.user_id] = None if not self.user_job_last2.has_key(job.user_id): self.user_job_last2[job.user_id] = None if not self.user_job_last3.has_key(job.user_id): self.user_job_last3[job.user_id] = None #TODO:make x #x[0] is user estimated run time #x[1] is p_i-1, p_i-2 mean #Required_time (aka user estimated run time) x[0] = job.user_estimated_run_time #Moving averages if self.user_job_last2[job.user_id] != None: #TODO:check if we know already the 2 last run time, take a choice. j1 = self.user_job_last1[job.user_id] j2 = self.user_job_last2[job.user_id] if j1.submit_time + j1.actual_run_time > current_time: last1 = j1.actual_run_time else: last1 = current_time - j1.submit_time if j2.submit_time + j2.actual_run_time > current_time: last2 = j2.actual_run_time else: last2 = current_time - j2.submit_time average = float((last1 + last2) / 2) x[1] = min(job.user_estimated_run_time, average) elif self.user_job_last1[job.user_id] != None: #TODO:check if we know already the last run time, take a choice. j1 = self.user_job_last1[job.user_id] if j1.submit_time + j1.actual_run_time > current_time: last = j1.actual_run_time else: last = current_time - j1.submit_time x[1] = min(job.user_estimated_run_time, last) else: x[1] = job.user_estimated_run_time return x def store_x(self, job, x): """store x for a given job if its not already stored""" if job not in self.job_x.keys(): self.job_x[job] = x def pop_x(self, job): """retrieve x for a given job and delete it from memory""" x = self.job_x.pop(job, []) if x == []: raise ValueError("Predictor internal x memory failed.") return x def predict(self, job, current_time, list_running_jobs): """ Modify the predicted_run_time of a job. Called when a job is submitted to the system. """ #make x x = self.make_x(job, current_time, list_running_jobs) #store x self.store_x(job, x) #make the prediction job.predicted_run_time = abs(self.model.predict(x)) if not self.max_runtime == False: job.predicted_run_time = min(job.predicted_run_time, self.max_runtime) def fit(self, job, current_time): """ Add a job to the learning algorithm. Called when a job end. """ #pop x from internal data x = self.pop_x(job) #updating our data #store user previous run time history assert self.user_job_last1.has_key(job.user_id) == True assert self.user_job_last2.has_key(job.user_id) == True assert self.user_job_last3.has_key(job.user_id) == True self.user_job_last3[job.user_id] = self.user_job_last2[job.user_id] self.user_job_last2[job.user_id] = self.user_job_last1[job.user_id] self.user_job_last1[job.user_id] = job #fit the model self.model.fit(x, job.actual_run_time, p=10 * np.log(1 + (job.actual_run_time / min(1, job.num_required_processors))))
def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.user_sum_runtimes = {} self.user_sum_cores = {} self.user_n_jobs = {} self.job_x = {} self.user_last_ending = {} #Statistics oriented. self.last_loss = 0 if options["scheduler"]["predictor"]["quadratic"]: print("Using predictor with quadratic features") self.quadratic = True if options["scheduler"]["predictor"]["cubic"]: self.cubic = True self.n_features = int(3 * self.n_features + kPn(self.n_features, 2) + kPn(self.n_features, 3)) else: self.n_features = int(3 * self.n_features + 2 * kPn(self.n_features, 2)) self.cubic = False else: self.cubic = False self.quadratic = False #machine learning thing m = LinearModel(self.n_features) if "max_runtime" in options["scheduler"]["predictor"].keys(): self.max_runtime = options["scheduler"]["predictor"]["max_runtime"] else: self.max_runtime = False if options["scheduler"]["predictor"]["loss"] == "squaredloss": from valopt.losses.squared_loss import SquaredLoss l = SquaredLoss(m, maxloss=self.max_runtime) elif options["scheduler"]["predictor"]["loss"] == "composite": from valopt.losses.composite import CompositeLoss if options["scheduler"]["predictor"]["leftside"] == "abs": from valopt.losses.losscurves.abs import Abscurve leftside = Abscurve( m, options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"] == "square": from valopt.losses.losscurves.square import Squarecurve leftside = Squarecurve( m, options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"] == "exp": from valopt.losses.losscurves.exp import Expcurve leftside = Expcurve( m, options["scheduler"]["predictor"]["leftparam"]) else: raise ValueError( "predictor config error: no leftside specified") if options["scheduler"]["predictor"]["rightside"] == "abs": from valopt.losses.losscurves.abs import Abscurve rightside = Abscurve( m, options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"] == "square": from valopt.losses.losscurves.square import Squarecurve rightside = Squarecurve( m, options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"] == "exp": from valopt.losses.losscurves.exp import Expcurve rightside = Expcurve( m, options["scheduler"]["predictor"]["rightparam"]) else: raise ValueError( "predictor config error: no rightside specified") l = CompositeLoss(m, rightside, leftside, options["scheduler"]["predictor"]["threshold"]) else: raise ValueError( "predictor config error: no valid loss specified.") if "lambda" in options["scheduler"]["predictor"].keys(): if options["scheduler"]["predictor"]["regularization"] == "l1": from valopt.losses.regularizations.l1 import L1 from valopt.losses.regularized_loss import RegularizedLoss l = RegularizedLoss( m, l, L1(), options["scheduler"]["predictor"]["lambda"]) elif options["scheduler"]["predictor"]["regularization"] == "l2": from valopt.losses.regularizations.l2 import L2 from valopt.losses.regularized_loss import RegularizedLoss l = RegularizedLoss( m, l, L2(), options["scheduler"]["predictor"]["lambda"]) else: raise ValueError( "predictor config error: lambda present and no valid regularizer specified." ) if options["scheduler"]["predictor"]["gd"] == "NAG": from valopt.algos.nag import NAG self.model = NAG(m, l, options["scheduler"]["predictor"]["eta"], verbose=False) elif options["scheduler"]["predictor"]["gd"] == "sNAG": from valopt.algos.snag import sNAG self.model = sNAG(m, l, options["scheduler"]["predictor"]["eta"], verbose=False) if not options["scheduler"]["predictor"]["weight"]: wstr = compile("1", "<string>", "eval") else: wstr = compile(options["scheduler"]["predictor"]["weight"], "<string>", "eval") def weight(job): m = float(job.num_required_processors) r = float(job.actual_run_time) #log=np.log log = math.log return eval(wstr) self.weight = weight
class PredictorSgdlinear(Predictor): #Internal info n_features = 19 def __init__(self, options): #Data structures for storing info self.user_job_last3 = {} self.user_job_last2 = {} self.user_job_last1 = {} self.user_sum_runtimes = {} self.user_sum_cores = {} self.user_n_jobs = {} self.job_x = {} self.user_last_ending = {} #Statistics oriented. self.last_loss = 0 if options["scheduler"]["predictor"]["quadratic"]: print("Using predictor with quadratic features") self.quadratic = True if options["scheduler"]["predictor"]["cubic"]: self.cubic = True self.n_features = int(3 * self.n_features + kPn(self.n_features, 2) + kPn(self.n_features, 3)) else: self.n_features = int(3 * self.n_features + 2 * kPn(self.n_features, 2)) self.cubic = False else: self.cubic = False self.quadratic = False #machine learning thing m = LinearModel(self.n_features) if "max_runtime" in options["scheduler"]["predictor"].keys(): self.max_runtime = options["scheduler"]["predictor"]["max_runtime"] else: self.max_runtime = False if options["scheduler"]["predictor"]["loss"] == "squaredloss": from valopt.losses.squared_loss import SquaredLoss l = SquaredLoss(m, maxloss=self.max_runtime) elif options["scheduler"]["predictor"]["loss"] == "composite": from valopt.losses.composite import CompositeLoss if options["scheduler"]["predictor"]["leftside"] == "abs": from valopt.losses.losscurves.abs import Abscurve leftside = Abscurve( m, options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"] == "square": from valopt.losses.losscurves.square import Squarecurve leftside = Squarecurve( m, options["scheduler"]["predictor"]["leftparam"]) elif options["scheduler"]["predictor"]["leftside"] == "exp": from valopt.losses.losscurves.exp import Expcurve leftside = Expcurve( m, options["scheduler"]["predictor"]["leftparam"]) else: raise ValueError( "predictor config error: no leftside specified") if options["scheduler"]["predictor"]["rightside"] == "abs": from valopt.losses.losscurves.abs import Abscurve rightside = Abscurve( m, options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"] == "square": from valopt.losses.losscurves.square import Squarecurve rightside = Squarecurve( m, options["scheduler"]["predictor"]["rightparam"]) elif options["scheduler"]["predictor"]["rightside"] == "exp": from valopt.losses.losscurves.exp import Expcurve rightside = Expcurve( m, options["scheduler"]["predictor"]["rightparam"]) else: raise ValueError( "predictor config error: no rightside specified") l = CompositeLoss(m, rightside, leftside, options["scheduler"]["predictor"]["threshold"]) else: raise ValueError( "predictor config error: no valid loss specified.") if "lambda" in options["scheduler"]["predictor"].keys(): if options["scheduler"]["predictor"]["regularization"] == "l1": from valopt.losses.regularizations.l1 import L1 from valopt.losses.regularized_loss import RegularizedLoss l = RegularizedLoss( m, l, L1(), options["scheduler"]["predictor"]["lambda"]) elif options["scheduler"]["predictor"]["regularization"] == "l2": from valopt.losses.regularizations.l2 import L2 from valopt.losses.regularized_loss import RegularizedLoss l = RegularizedLoss( m, l, L2(), options["scheduler"]["predictor"]["lambda"]) else: raise ValueError( "predictor config error: lambda present and no valid regularizer specified." ) if options["scheduler"]["predictor"]["gd"] == "NAG": from valopt.algos.nag import NAG self.model = NAG(m, l, options["scheduler"]["predictor"]["eta"], verbose=False) elif options["scheduler"]["predictor"]["gd"] == "sNAG": from valopt.algos.snag import sNAG self.model = sNAG(m, l, options["scheduler"]["predictor"]["eta"], verbose=False) if not options["scheduler"]["predictor"]["weight"]: wstr = compile("1", "<string>", "eval") else: wstr = compile(options["scheduler"]["predictor"]["weight"], "<string>", "eval") def weight(job): m = float(job.num_required_processors) r = float(job.actual_run_time) #log=np.log log = math.log return eval(wstr) self.weight = weight def make_x(self, job, current_time, list_running_jobs): """Make a vector from a job. requires job, current time and system state.""" #x=np.empty(self.n_features,dtype=np.float32) x = [0] * self.n_features #checks on user internal memory if not self.user_job_last1.has_key(job.user_id): self.user_job_last1[job.user_id] = None if not self.user_job_last2.has_key(job.user_id): self.user_job_last2[job.user_id] = None if not self.user_job_last3.has_key(job.user_id): self.user_job_last3[job.user_id] = None if not self.user_sum_cores.has_key(job.user_id): self.user_sum_cores[job.user_id] = 0.0 if not self.user_sum_runtimes.has_key(job.user_id): self.user_sum_runtimes[job.user_id] = 0.0 if not self.user_n_jobs.has_key(job.user_id): self.user_n_jobs[job.user_id] = 0.0 if not self.user_last_ending.has_key(job.user_id): self.user_last_ending[job.user_id] = 0.0 #TODO:make x #x[0] is 1 #x[1] is last user run time #x[2] is last user run time2 #x[3] is last user run time3 #x[4] is user request #x[5] is moving average(3) #x[6] is moving average(2) #x[7] is user runtime mean #x[8] is time since last time a job of the user ended. #Turning linear model into affine model x[0] = 1.0 #Last runtime if self.user_job_last1[job.user_id] != None: j1 = self.user_job_last1[job.user_id] if j1.submit_time + j1.actual_run_time > current_time: last = float(j1.actual_run_time) else: last = float(current_time - j1.submit_time) else: last = float(job.user_estimated_run_time) x[1] = float(min(job.user_estimated_run_time, last)) #Last runtime2 if self.user_job_last2[job.user_id] != None: j2 = self.user_job_last2[job.user_id] if j2.submit_time + j2.actual_run_time > current_time: last = j2.actual_run_time else: last = current_time - j2.submit_time else: last = job.user_estimated_run_time x[2] = float(min(job.user_estimated_run_time, last)) #Last runtime3 if self.user_job_last3[job.user_id] != None: j3 = self.user_job_last3[job.user_id] if j3.submit_time + j3.actual_run_time > current_time: last = j3.actual_run_time else: last = current_time - j3.submit_time else: last = job.user_estimated_run_time x[3] = float(min(job.user_estimated_run_time, last)) #Required_time (aka user estimated run time) x[4] = float(job.user_estimated_run_time) #Moving averages if self.user_job_last3[job.user_id] != None: x[6] = 0.33 * (x[1] + x[2] + x[3]) x[5] = 0.5 * (x[1] + x[2]) elif self.user_job_last2[job.user_id] != None: x[5] = 0.5 * (x[1] + x[2]) x[6] = x[5] elif self.user_job_last1[job.user_id] != None: x[5] = x[1] x[6] = x[5] else: x[5] = float(job.user_estimated_run_time) x[6] = x[5] #User run time mean if not self.user_n_jobs[job.user_id] == 0: x[7] = float(self.user_sum_runtimes[job.user_id]) / float( self.user_n_jobs[job.user_id]) #print "ifed" #print x[7] else: x[7] = 0.0 #print "elsed" #T since Last job ending of this user if not self.user_last_ending[job.user_id] == 0.0: x[8] = float(current_time - self.user_last_ending[job.user_id]) else: x[8] = 0.0 #Ratio of Cores from user mean to this one. #User cores mean if not self.user_n_jobs[job.user_id] == 0.0: coremean = float(self.user_sum_cores[job.user_id]) / float( self.user_n_jobs[job.user_id]) x[9] = float(job.num_required_processors) / coremean else: x[9] = 0.0 running_mine = [ j for j in list_running_jobs if j.user_id == job.user_id ] #total cores running by this user x[10] = float(sum([j.num_required_processors for j in running_mine])) #sum of runtime of already running jobs of the user lengths_running = [ current_time - j.start_to_run_at_time for j in running_mine ] x[11] = float(sum(lengths_running)) #amount of jobs of this user already running x[12] = float(len(running_mine)) #length of longest job of user already running if len(lengths_running) == 0: x[13] = 0.0 else: x[13] = float(max(lengths_running)) #second of day sec_of_day = 2.0 * math.pi * float(job.submit_time % (3600 * 24)) / (3600.0 * 24.0) #cos second of day x[14] = math.cos(sec_of_day) #sin second of day x[15] = math.sin(sec_of_day) # #day of week trough seconds: day_of_week = 2.0 * math.pi * float( job.submit_time % (3600 * 24 * 7)) / (3600.0 * 24.0 * 7.0) #cos day of week x[16] = math.cos(day_of_week) # #sin day of week x[17] = math.sin(day_of_week) # #Job cores x[18] = float(job.num_required_processors) if self.quadratic: i = 19 for a, b in itertools.combinations(x[1:17], 2): x[i] = a * b i += 1 for k in range(1, 19): x[i] = x[k] * x[k] i += 1 #for k in range(1,19): #x[i]=1/max(0.001,x[k]) #i+=1 #for a,b in itertools.combinations(x[1:17],2): #x[i]=1/max(0.001,a*b) #i+=1 if self.cubic: for a, b, c in itertools.combinations(x[1:17], 3): x[i] = a * b * c i += 1 for k in range(1, 19): x[i] = x[k] * x[k] * x[k] i += 1 #if self.cubic: #for a,b,c in itertools.combinations(x[0:17],3): #x[i]=a*b*c #i+=1 return x def store_x(self, job, x): """store x for a given job if its not already stored""" if job not in self.job_x: self.job_x[job] = x def pop_x(self, job): """retrieve x for a given job and delete it from memory""" x = self.job_x.pop(job, []) if x == []: raise ValueError("Predictor internal x memory failed.") return x def predict(self, job, current_time, list_running_jobs): """ Modify the predicted_run_time of a job. Called when a job is submitted to the system. """ if not job in self.job_x: #make x x = self.make_x(job, current_time, list_running_jobs) #store x self.store_x(job, x) else: x = self.job_x[job] #make the prediction fff = abs(self.model.predict(x)) job.predicted_run_time = int(max(1.0, int(fff))) job.predicted_run_time = int( min(job.predicted_run_time, job.user_estimated_run_time)) if not self.max_runtime == False: job.predicted_run_time = int( max(1.0, min(job.predicted_run_time, self.max_runtime))) #return self.model.loss.loss(x,job.actual_run_time,self.weight(job)) def fit(self, job, current_time): """ Add a job to the learning algorithm. Called when a job end. """ #pop x from internal data x = self.pop_x(job) #updating our data #store user previous run time history assert self.user_job_last1.has_key(job.user_id) == True assert self.user_job_last2.has_key(job.user_id) == True assert self.user_job_last3.has_key(job.user_id) == True assert self.user_sum_runtimes.has_key(job.user_id) == True assert self.user_sum_cores.has_key(job.user_id) == True assert self.user_n_jobs.has_key(job.user_id) == True assert self.user_last_ending.has_key(job.user_id) == True self.user_job_last3[job.user_id] = self.user_job_last2[job.user_id] self.user_job_last2[job.user_id] = self.user_job_last1[job.user_id] self.user_job_last1[job.user_id] = job self.user_n_jobs[job.user_id] += 1 self.user_sum_runtimes[job.user_id] += job.actual_run_time self.user_sum_cores[job.user_id] += job.num_required_processors self.user_last_ending[job.user_id] = current_time #fit the model self.model.fit(x, job.actual_run_time, w=self.weight(job))