def validate_resource_name(resource_name): if resource_name.strip()=="": logging.error("Bad name for resource, try a different name") return False #check config file presence file_path = os.path.abspath(os.path.join(resources_dir, resource_name)) if os.path.exists(file_path): logging.error("Resource configuration directory (%s) for resource with name %s already present on file system, try a different name"%(file_path,resource_name,)) return False #check the entry in mod_appkernel dbAK,curAK=akrr.getAKDB(True) curAK.execute('''SELECT * FROM resource WHERE nickname=%s''', (resource_name,)) resource_in_AKDB = curAK.fetchall() if len(resource_in_AKDB)!=0: logging.error("Resource with name %s already present in mod_appkernel DB, try a different name"%(resource_name,)) return False #check the entry in mod_akrr db,cur=akrr.getDB(True) cur.execute('''SELECT * FROM resources WHERE name=%s''', (resource_name,)) resource_in_DB = cur.fetchall() if len(resource_in_DB)!=0: logging.error("Resource with name %s already present in mod_akrr DB, try a different name"%(resource_name,)) return False return True
def UpdateSubTasks(self): #force to check SubTasks #stack the subtasks subTaskInfo=self.GetSubTaskInfo() db,cur=akrr.getDB() for subtask_id,subtask_status,subtask_datetimestamp,subtask_resource,subtask_app,subtask_task_param in subTaskInfo: cur.execute('''UPDATE ACTIVETASKS SET next_check_time=%s WHERE task_id=%s ;''',(datetime.datetime.today(),subtask_id)) db.commit() cur.close() del db
def GetSubTaskInfo(self): db,cur=akrr.getDB() cur.execute('''SELECT task_id,status,datetimestamp,resource,app,task_param FROM ACTIVETASKS WHERE task_param LIKE %s AND task_param LIKE '%%masterTaskID%%' ORDER BY task_id ASC ''',("%%%d%%"%(self.task_id,),)) raws=cur.fetchall() subTaskInfo=[] for task_id,status,datetimestamp,resource,app,task_param in raws: task_param=eval(task_param) if task_param['masterTaskID']==self.task_id: subTaskInfo.append([task_id,status,datetimestamp,resource,app,task_param]) cur.close() del db return subTaskInfo
def generate_resource_config(resource_id, resource_name, queuing_system): logging.info("Initiating %s at AKRR"%(resource_name,)) slurm_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates', 'template.{0}.inp.py'), 'slurm') pbs_template_contents = retrieve_queue_template(os.path.join(akrr.curdir, 'templates', 'template.{0}.inp.py'), 'pbs') queues = {'slurm': slurm_template_contents, 'pbs': pbs_template_contents} if not args.test: os.mkdir(os.path.join(resources_dir, resource_name),0700) file_path = os.path.abspath(os.path.join(resources_dir, resource_name, 'resource.inp.py')) global resource_cfg_filename resource_cfg_filename=file_path create_resource_template(file_path, queues[queuing_system], queues[queuing_system]) if not args.test: #add entry to mod_appkernel.resource dbAK,curAK=akrr.getAKDB(True) curAK.execute('''SELECT * FROM resource WHERE nickname=%s''', (resource_name,)) resource_in_AKDB = curAK.fetchall() if len(resource_in_AKDB)==0: curAK.execute('''INSERT INTO resource (resource,nickname,description,enabled,visible,xdmod_resource_id) VALUES(%s,%s,%s,0,0,%s);''', (resource_name,resource_name,resource_name,resource_id)) dbAK.commit() curAK.execute('''SELECT * FROM resource WHERE nickname=%s''', (resource_name,)) resource_in_AKDB = curAK.fetchall() resource_id_in_AKDB=resource_in_AKDB[0]['resource_id'] #add entry to mod_akrr.resource db,cur=akrr.getDB(True) cur.execute('''SELECT * FROM resources WHERE name=%s''', (resource_name,)) resource_in_DB = cur.fetchall() if len(resource_in_DB)==0: cur.execute('''INSERT INTO resources (id,xdmod_resource_id,name,enabled) VALUES(%s,%s,%s,%s);''', (resource_id_in_AKDB,resource_id,resource_name,0)) db.commit() logging.info("Resource configuration is in "+file_path)
def CreateBatchJobScriptAndSubmitIt(self): self.JobScriptName=self.appName+".job" print "### Creating batch job script and submitting it to remote machine" #as a current bypass will create a job script remotely and copy it here #get ssh to remote resource sh=None try: sh=akrr.sshResource(self.resource) #Create remote directories if needed def CheckAndCreateDir(self,sh,d): cmd="if [ ! -d \"%s\" ]\n then mkdir \"%s\"\n fi"%(d,d) akrr.sshCommand(sh,cmd) cmd="if [ -d \"%s\" ]\n then \necho EXIST\n else echo DOESNOTEXIST\n fi"%(d) msg=akrr.sshCommand(sh,cmd) if msg.find("DOESNOTEXIST")>=0: raise akrr.akrrError(akrr.ERROR_REMOTE_FILES,"Can not create directory %s on %s."%(d,self.resource['name'])) #akrrdata CheckAndCreateDir(self,sh,self.resource['akrrdata']) #dir for app CheckAndCreateDir(self,sh,os.path.join(self.resource['akrrdata'],self.appName)) #dir for task CheckAndCreateDir(self,sh,self.remoteTaskDir) #CheckAndCreateDir(self,sh,os.path.join(self.remoteTaskDir,"batchJob_pl")) #cd to remoteTaskDir akrr.sshCommand(sh,"cd %s"%(self.remoteTaskDir)) #get walltime from DB dbdefaults={} try: db,cur=akrr.getDB() cur.execute('''SELECT resource,app,resource_param,app_param FROM ACTIVETASKS WHERE task_id=%s ;''',(self.task_id,)) raw=cur.fetchall() (resource,app,resource_param,app_param)=raw[0] cur.execute("""SELECT walllimit FROM akrr_default_walllimit WHERE resource=%s AND app=%s AND resource_param=%s AND app_param=%s """,(resource,app,resource_param,app_param)) raw=cur.fetchall() if len(raw)>0: dbdefaults['walllimit']=raw[0][0] #db.commit() cur.close() del db except Exception as e: pass #create job-script batchvars={} #print "#"*80 for di in [self.resource,self.app,dbdefaults,self.resourceParam, self.appParam]: batchvars.update(di) #stack the subtasks subTaskInfo=self.GetSubTaskInfo() if batchvars['shuffleSubtasks']: random.shuffle(subTaskInfo) subTasksExecution="" for subtask_id,subtask_status,subtask_datetimestamp,subtask_resource,subtask_app,subtask_task_param in subTaskInfo: remoteSubTaskDir=self.GetRemoteTaskDir(self.resource['akrrdata'],subtask_app,subtask_datetimestamp) SubTaskJobScriptName=self.GetJobScriptName(subtask_app) SubTaskJobScriptPath=os.path.join(remoteSubTaskDir,SubTaskJobScriptName) subTasksExecution+="cd "+remoteSubTaskDir+"\n" #subTasksExecution+="cp "+os.path.join(self.remoteTaskDir,"job.id ")+"./\n" subTasksExecution+="echo Starting "+subtask_app+"\n" subTasksExecution+=self.resource['shell']+" "+SubTaskJobScriptPath+" > stdout 2> stderr\n" subTasksExecution+="echo Done with "+subtask_app+"\n"+"\n" batchvars['subTasksExecution']=subTasksExecution #calculate NNodes and NCores tmpNNodes=None tmpNCores=None if batchvars.has_key('nnodes'): tmpNNodes=batchvars['nnodes'] tmpNCores=tmpNNodes*batchvars['ppn'] else: tmpNCores=batchvars['ncores'] if tmpNCores%batchvars['ppn']==0: tmpNNodes=tmpNCores/batchvars['ppn'] else: tmpNNodes=(tmpNCores/batchvars['ppn'])+1 batchvars['akrrNCores']=tmpNCores batchvars['akrrNNodes']=tmpNNodes #Set batchvars remaps batchvars['akrrPPN']=batchvars['ppn'] batchvars['akrrNCoresToBorder']=batchvars['akrrPPN']*batchvars['akrrNNodes'] batchvars['akrrTaskWorkingDir']=self.remoteTaskDir batchvars['akrrWallTimeLimit']="%02d:%02d:00"%(int(batchvars['walllimit'])/60,int(batchvars['walllimit'])%60) batchvars['localPATH']=akrr.sshCommand(sh,"echo $PATH").strip() batchvars['akrrAppKerName']=self.app['name'] batchvars['akrrResourceName']=self.resource['name'] batchvars['akrrTimeStamp']= self.timeStamp if batchvars['akrrNNodes']==1: batchvars['akrrPPN4NodesOrCores4OneNode']=batchvars['akrrNCores'] else: batchvars['akrrPPN4NodesOrCores4OneNode']=batchvars['akrrPPN'] if 'nodeListSetterTemplate' not in batchvars: batchvars['nodeListSetterTemplate']=batchvars['nodeListSetter'][batchvars['batchScheduler']] #set AppKerLauncher #if self.resource['name'] in batchvars['runScript']: # batchvars['akrrStartAppKer']=akrr.formatRecursively(batchvars['runScript'][self.resource['name']],batchvars,keepDoubleBrakets=True) #else: # batchvars['akrrStartAppKer']=akrr.formatRecursively(batchvars['runScript']['default'],batchvars,keepDoubleBrakets=True) #process templates batchvars['akrrCommonCommands']=akrr.formatRecursively(batchvars['akrrCommonCommandsTemplate'],batchvars,keepDoubleBrakets=True) #batchvars['akrrCommonTests']=akrr.formatRecursively(batchvars['akrrCommonTestsTemplate'],batchvars,keepDoubleBrakets=True) #batchvars['akrrStartAppKer']=batchvars['akrrStartAppKerTemplate'].format(**batchvars) batchvars['akrrCommonCleanup']=akrr.formatRecursively(batchvars['akrrCommonCleanupTemplate'],batchvars,keepDoubleBrakets=True) #do parameters adjustment if 'process_params' in batchvars: batchvars['process_params'](batchvars) #generate job script jobScript=akrr.formatRecursively(self.resource["batchJobTemplate"],batchvars) fout=open(os.path.join(self.taskDir,"jobfiles",self.JobScriptName),"w") fout.write(jobScript) fout.close() msg=akrr.scpToResource(self.resource,os.path.join(self.taskDir,"jobfiles",self.JobScriptName),os.path.join(self.remoteTaskDir)) ##akrr.sshCommandNoReturn(sh,"cat > %s << EOF1234567\n%s\nEOF1234567\n"%(self.JobScriptName,jobScript)) akrr.sshCommand(sh,"cat %s "%(self.JobScriptName)) #send to queue from string import Template sendToQueue=Template(submitCommands[self.resource['batchScheduler']]).substitute(scriptPath=self.JobScriptName) msg=akrr.sshCommand(sh,sendToQueue) matchObj=re.search(jidExtractPatterns[self.resource['batchScheduler']],msg,re.M|re.S) JobID=None if matchObj: try: JobID=int(matchObj.group(1)) except: raise akrr.akrrError(akrr.ERROR_REMOTE_JOB,"Can't get job id. "+msg) else: raise akrr.akrrError(akrr.ERROR_REMOTE_JOB,"Can't get job id. "+msg) akrr.sshCommand(sh,"echo %d > job.id"%(JobID)) #cp job id to subtasks for subtask_id,subtask_status,subtask_datetimestamp,subtask_resource,subtask_app,subtask_task_param in subTaskInfo: remoteSubTaskDir=self.GetRemoteTaskDir(self.resource['akrrdata'],subtask_app,subtask_datetimestamp) akrr.sshCommand(sh,"cp job.id %s"%(remoteSubTaskDir)) self.RemoteJobID=JobID self.TimeJobSubmetedToRemoteQueue=datetime.datetime.today() sh.sendline("exit") sh.close(force=True) del sh sh=None print "\nRemoteJobID=",self.RemoteJobID print "copying files from remote machine" msg=akrr.scpFromResource(self.resource,os.path.join(self.remoteTaskDir,"*"),os.path.join(self.taskDir,"jobfiles"),"-r") #update DB time_submitted_to_queue db,cur=akrr.getDB() cur.execute('''UPDATE ACTIVETASKS SET time_submitted_to_queue=%s WHERE task_id=%s ;''',(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"),self.task_id)) cur.close() del db self.status="Created batch job script and have submitted it to remote queue." self.statusinfo="Remote job ID is %d"%(self.RemoteJobID) self.ToDoNextString="CheckTheJobOnRemoteMachine" #check first time in 1 minute return datetime.timedelta(days=0, hours=0, minutes=1) except Exception as e: if sh!=None: sh.sendline("exit") sh.close(force=True) del sh self.status="ERROR Can not created batch job script and submit it to remote queue" self.statusinfo=traceback.format_exc() if akrr.max_fails_to_submit_to_the_queue>=0: if hasattr(self, "FailsToSubmitToTheQueue"): self.FailsToSubmitToTheQueue+=1 if self.FailsToSubmitToTheQueue>akrr.max_fails_to_submit_to_the_queue: #Stop execution of the task and submit results to db self.ToDoNextString="PushToDB" resultFile=os.path.join(self.taskDir,"result.xml") self.WriteErrorXML(resultFile) return datetime.timedelta(seconds=3) else: self.FailsToSubmitToTheQueue=1 else: self.FatalErrorsCount+=1 akrr.printException(self.status) return akrr.RepeateAfterFailsToSubmitToTheQueue
def GenerateBatchJobScript(self): if not hasattr(self, 'JobScriptName'): self.JobScriptName=self.GetJobScriptName(self.appName) #get walltime from DB dbdefaults={} try: db,cur=akrr.getDB() cur.execute('''SELECT resource,app,resource_param,app_param FROM ACTIVETASKS WHERE task_id=%s ;''',(self.task_id,)) raw=cur.fetchall() if len(raw)>0: (resource,app,resource_param,app_param)=raw[0] cur.execute("""SELECT walllimit FROM akrr_default_walllimit WHERE resource=%s AND app=%s AND resource_param=%s AND app_param=%s """,(resource,app,resource_param,app_param)) raw=cur.fetchall() if len(raw)>0: dbdefaults['walllimit']=raw[0][0] #db.commit() cur.close() del db except Exception as e: pass raise e #create job-script try: batchvars={} appkernelOnResource={} if 'appkernelOnResource' in self.app: if self.resourceName in self.app['appkernelOnResource']: appkernelOnResource=self.app['appkernelOnResource'][self.resourceName] elif 'default' in self.app['appkernelOnResource']: appkernelOnResource=self.app['appkernelOnResource']['default'] #print "#"*80 for di in [self.resource,self.app,appkernelOnResource,dbdefaults,self.resourceParam, self.appParam]: batchvars.update(di) #get autowalltime limit try: if 'autoWalltimeLimit' in batchvars and batchvars['autoWalltimeLimit']==True: print "\nautoWalltimeLimit is on, trying to estimate walltime limit..." autoWalltimeLimitOverhead=1.2 if 'autoWalltimeLimitOverhead' in batchvars: autoWalltimeLimitOverhead=batchvars['autoWalltimeLimitOverhead']+1.0 #query last 20 executions of this appkernel on that resource with that node count db,cur=akrr.getDB(True) cur.execute('''SELECT resource,reporter,reporternickname,collected,status,walltime FROM akrr_xdmod_instanceinfo WHERE `resource`=%s AND `reporternickname` = %s ORDER BY `akrr_xdmod_instanceinfo`.`collected` DESC LIMIT 0 , 20''',(self.resource['name'],"%s.%d"%(self.app['name'],batchvars['nnodes']))) raw=cur.fetchall() i=0 lastFiveRunsSuccessfull=True maxwalltime=0.0 for r in raw: if i<5 and r['status']==0: lastFiveRunsSuccessfull=False if r['status']==1 and r['walltime']>maxwalltime: maxwalltime=r['walltime'] i+=1 if i<5: print "There are only %d previous run, need at least 5 for walltime limit autoset" else: if lastFiveRunsSuccessfull == False: print "One of last 5 runs have failed. Would not use autoset." else: print "Max walltime was %.1f s, will change walltime limit from %.1f minutes to %d minutes"%(maxwalltime,batchvars['walllimit'],int(autoWalltimeLimitOverhead*maxwalltime/60.0+0.99)) batchvars['walllimit']=int((autoWalltimeLimitOverhead*maxwalltime/60.0+0.99)) print cur.close() del db except Exception as e: pass #calculate NNodes and NCores tmpNNodes=None tmpNCores=None if batchvars.has_key('nnodes'): tmpNNodes=batchvars['nnodes'] tmpNCores=tmpNNodes*batchvars['ppn'] else: tmpNCores=batchvars['ncores'] if tmpNCores%batchvars['ppn']==0: tmpNNodes=tmpNCores/batchvars['ppn'] else: tmpNNodes=(tmpNCores/batchvars['ppn'])+1 batchvars['akrrNCores']=tmpNCores batchvars['akrrNNodes']=tmpNNodes #Set batchvars remaps batchvars['akrrPPN']=batchvars['ppn'] batchvars['akrrNCoresToBorder']=batchvars['akrrPPN']*batchvars['akrrNNodes'] batchvars['akrrTaskWorkingDir']=self.remoteTaskDir batchvars['akrrWallTimeLimit']="%02d:%02d:00"%(int(batchvars['walllimit'])/60,int(batchvars['walllimit'])%60) #batchvars['localPATH']=akrr.sshCommand(sh,"echo $PATH").strip() batchvars['akrrAppKerName']=self.app['name'] batchvars['akrrResourceName']=self.resource['name'] batchvars['akrrTimeStamp']= self.timeStamp if batchvars['akrrNNodes']==1: batchvars['akrrPPN4NodesOrCores4OneNode']=batchvars['akrrNCores'] else: batchvars['akrrPPN4NodesOrCores4OneNode']=batchvars['akrrPPN'] if 'nodeListSetterTemplate' not in batchvars: batchvars['nodeListSetterTemplate']=batchvars['nodeListSetter'][batchvars['batchScheduler']] #set AppKerLauncher #if 'runScript' in batchvars: # if self.resource['name'] in batchvars['runScript']: # batchvars['akrrStartAppKer']=akrr.formatRecursively(batchvars['runScript'][self.resource['name']],batchvars,keepDoubleBrakets=True) # else: # batchvars['akrrStartAppKer']=akrr.formatRecursively(batchvars['runScript']['default'],batchvars,keepDoubleBrakets=True) #process templates batchvars['akrrCommonCommands']=akrr.formatRecursively(batchvars['akrrCommonCommandsTemplate'],batchvars,keepDoubleBrakets=True) #batchvars['akrrCommonTests']=akrr.formatRecursively(batchvars['akrrCommonTestsTemplate'],batchvars,keepDoubleBrakets=True) #batchvars['akrrStartAppKer']=batchvars['akrrStartAppKerTemplate'].format(**batchvars) batchvars['akrrCommonCleanup']=akrr.formatRecursively(batchvars['akrrCommonCleanupTemplate'],batchvars,keepDoubleBrakets=True) #specially for IOR request two nodes for single node benchmark, one for read and one for write if batchvars['requestTwoNodesForOneNodeAppKer']==True and batchvars['akrrNNodes']==1 and 'batchJobHeaderTemplate' in batchvars: batchvars2=copy.deepcopy(batchvars) batchvars2['akrrNCores']=2*batchvars['akrrNCores'] batchvars2['akrrNNodes']=2*batchvars['akrrNNodes'] batchvars2['akrrNCoresToBorder']=2*batchvars['akrrNCoresToBorder'] batchvars2['akrrPPN4NodesOrCores4OneNode']=batchvars['akrrPPN'] batchvars['batchJobHeaderTemplate']=akrr.formatRecursively(batchvars2['batchJobHeaderTemplate'],batchvars2) pass #do parameters adjustment if 'process_params' in batchvars: batchvars['process_params'](batchvars) #generate job script jobScript=akrr.formatRecursively(self.resource["batchJobTemplate"],batchvars) jobScriptFullPath=os.path.join(self.taskDir,"jobfiles",self.JobScriptName) fout=open(jobScriptFullPath,"w") fout.write(jobScript) fout.close() except Exception as e: self.status="ERROR: Can not created batch job script" self.statusinfo=traceback.format_exc() akrr.printException(self.status) raise e
def CreateBatchJobScriptAndSubmitIt(self,doNotSubmitToQueue=False): self.JobScriptName=self.GetJobScriptName(self.appName) print "### Creating batch job script and submitting it to remote machine" #as a current bypass will create a job script remotely and copy it here #get ssh to remote resource sh=None try: sh=akrr.sshResource(self.resource) #Create remote directories if needed def CheckAndCreateDir(self,sh,d): cmd="if [ ! -d \"%s\" ]\n then mkdir \"%s\"\n fi"%(d,d) akrr.sshCommand(sh,cmd) cmd="if [ -d \"%s\" ]\n then \necho EXIST\n else echo DOESNOTEXIST\n fi"%(d) msg=akrr.sshCommand(sh,cmd) if msg.find("DOESNOTEXIST")>=0: raise akrr.akrrError(akrr.ERROR_REMOTE_FILES,"Can not create directory %s on %s."%(d,self.resource['name'])) #akrrdata CheckAndCreateDir(self,sh,self.resource['akrrdata']) #dir for app CheckAndCreateDir(self,sh,os.path.join(self.resource['akrrdata'],self.appName)) #dir for task CheckAndCreateDir(self,sh,self.remoteTaskDir) #CheckAndCreateDir(self,sh,os.path.join(self.remoteTaskDir,"batchJob_pl")) #cd to remoteTaskDir akrr.sshCommand(sh,"cd %s"%(self.remoteTaskDir)) #GenerateBatchJobScript self.GenerateBatchJobScript() msg=akrr.scpToResource(self.resource,os.path.join(self.taskDir,"jobfiles",self.JobScriptName),os.path.join(self.remoteTaskDir)) if doNotSubmitToQueue: return ##akrr.sshCommandNoReturn(sh,"cat > %s << EOF1234567\n%s\nEOF1234567\n"%(self.JobScriptName,jobScript)) akrr.sshCommand(sh,"cat %s "%(self.JobScriptName)) #send to queue from string import Template JobID=0 if not 'masterTaskID' in self.taskParam: #i.e. submit to queue only if task is independent sendToQueue=Template(submitCommands[self.resource['batchScheduler']]).substitute(scriptPath=self.JobScriptName) msg=akrr.sshCommand(sh,sendToQueue) matchObj=re.search(jidExtractPatterns[self.resource['batchScheduler']],msg,re.M|re.S) if matchObj: try: JobID=int(matchObj.group(1)) except: raise akrr.akrrError(akrr.ERROR_REMOTE_JOB,"Can't get job id:\n"+msg) else: raise akrr.akrrError(akrr.ERROR_REMOTE_JOB,"Can't get job id:\n"+msg) akrr.sshCommand(sh,"echo %d > job.id"%(JobID)) self.RemoteJobID=JobID self.TimeJobSubmetedToRemoteQueue=datetime.datetime.today() sh.sendline("exit") sh.close(force=True) del sh sh=None print "\nRemoteJobID=",self.RemoteJobID print "copying files from remote machine" msg=akrr.scpFromResource(self.resource,os.path.join(self.remoteTaskDir,"*"),os.path.join(self.taskDir,"jobfiles"),"-r") #update DB time_submitted_to_queue db,cur=akrr.getDB() cur.execute('''UPDATE ACTIVETASKS SET time_submitted_to_queue=%s WHERE task_id=%s ;''',(datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S"),self.task_id)) cur.close() del db if not 'masterTaskID' in self.taskParam: #i.e. idepentent task self.status="Created batch job script and have submitted it to remote queue." self.statusinfo="Remote job ID is %d"%(self.RemoteJobID) self.ToDoNextString="CheckTheJobOnRemoteMachine" #check first time in 1 minute return datetime.timedelta(days=0, hours=0, minutes=1) else: #i.e. this is subtask #i.e. idepentent task self.status="Created batch job script." self.statusinfo="Created batch job script. Waiting for master task to execute it." self.ToDoNextString="CheckTheJobOnRemoteMachine" #master task will update the time when it will finish task execution return datetime.timedelta(days=111*365) except Exception as e: if sh!=None: sh.sendline("exit") sh.close(force=True) del sh self.status="ERROR Can not created batch job script and submit it to remote queue" self.statusinfo=traceback.format_exc() if akrr.max_fails_to_submit_to_the_queue>=0: if hasattr(self, "FailsToSubmitToTheQueue"): self.FailsToSubmitToTheQueue+=1 if (self.FailsToSubmitToTheQueue>akrr.max_fails_to_submit_to_the_queue or (self.taskParam['test_run']==True and self.FailsToSubmitToTheQueue>=2)): #Stop execution of the task and submit results to db self.ToDoNextString="PushToDB" resultFile=os.path.join(self.taskDir,"result.xml") self.WriteErrorXML(resultFile) return datetime.timedelta(seconds=3) else: self.FailsToSubmitToTheQueue=1 else: self.FatalErrorsCount+=1 akrr.printException(self.status) return akrr.repeat_after_fails_to_submit_to_the_queue
#check if AK is in DB if True: #add entry to mod_appkernel.resource dbAK,curAK=akrr.getAKDB(True) curAK.execute('''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''', (app_name,)) ak_in_AKDB = curAK.fetchall() if len(ak_in_AKDB)==0: curAK.execute('''INSERT INTO app_kernel_def (name,ak_base_name,processor_unit,enabled, description, visible) VALUES(%s,%s,'node',0,%s,0);''', (app_name,app_name,app_name)) dbAK.commit() curAK.execute('''SELECT * FROM app_kernel_def WHERE ak_base_name=%s''', (app_name,)) ak_in_AKDB = curAK.fetchall()[0] #add entry to mod_akrr.resource db,cur=akrr.getDB(True) cur.execute('''SELECT * FROM app_kernels WHERE name=%s''', (app_name,)) ak_in_DB = cur.fetchall() if len(ak_in_DB)==0: cur.execute('''INSERT INTO app_kernels (id,name,enabled,nodes_list) VALUES(%s,%s,0,'1,2,4,8');''', (ak_in_AKDB['ak_def_id'],app_name)) db.commit() ############################################################################################### #connect to resource log("#"*80) log("Validating resource accessibility. Connecting to %s."%(resource['name'])) if resource['sshPrivateKeyFile']!=None and os.path.isfile(resource['sshPrivateKeyFile'])==False: logerr("Can not access ssh private key (%s)"""%(resource['sshPrivateKeyFile'],))