def syncCampaign(Session): try: output = Client.getAllJobs() if output[0] != 0: raise Exception("Server error") else: output = json.loads(output[1])['jobs'] except Exception as e: logging.error(traceback.format_exc()) Session.rollback() sys.exit(1) jobsToRepopulate = [] for j in output: try: #Check for pre-existing job with this pandaid #We have to evaluate these queries lazily to avoid throwing an unnecessary exception if (j['pandaid'] and j['jobname']): isExistingPandaID = Session.query(Job).filter( Job.pandaID.like(j['pandaid'])) isExistingJobName = Session.query(Job).filter( Job.serverName.like(j['jobname'])) if (isExistingPandaID.first() is None and isExistingJobName.first() is None): if (len(j['jobname']) > 37): #See if the jobname fits the format campaignName, i, oF = unpackServerName(j['jobname']) if (campaignName): campaign = Session.query(Campaign).filter( Campaign.name.like(campaignName)).first() if (campaign is None): campaign = Campaign( name=campaignName, lastUpdate=datetime.datetime.utcnow()) Session.add(campaign) Session.commit() #We can't recover the job script from the monitor output - we do that with another query below job = Job(script="unknown", campaignID=campaign.id, pandaID=j['pandaid'], serverName=j['jobname'], status=j['jobstatus'], subStatus=j['jobsubstatus']) if i: job.iterable = i #In some instances panda server can report a null substatus. Converting these to empty strings to fulfil database rules if not j['jobsubstatus']: job.subStatus = "" Session.add(job) Session.commit() #Record that this campaign/job id pair was missing, but only after it's been committed jobsToRepopulate.append((campaign.id, job.pandaID)) except Exception as e: logging.error(traceback.format_exc()) Session.rollback() #We need to query each job individually to get its job parameters campsToRepopulate = set([seq[0] for seq in jobsToRepopulate]) for c in campsToRepopulate: try: camp = Session.query(Campaign).get(c) jobs = [seq[1] for seq in jobsToRepopulate if seq[0] == c] #Recreate the jobs that were missing camp.updateJobs(Session, recreate=True, jobs_to_query=jobs) #Now update them all to make sure everything is legit camp.updateJobs(Session) #Now check to see if we have duplicate output files for OF in Session.query(Job).with_entities( Job.outputFile).group_by(Job.outputFile).all(): jobsThisOF = Session.query(Job).filter( Job.outputFile.like(OF[0])).count() if (jobsThisOF > 1): print( coloured( 'Warning:' + str(jobsThisOF) + ' job(s) have shared output file: \n' + OF[0] + '\n', 'red')) except Exception as e: logging.error(traceback.format_exc()) Session.rollback() return None
def submitCampaign(Session, jobsFile): # read yaml description jobdef = None try: campdef = submissionTools.PandaJobsJSONParser.parse(jobsFile) campaign = Session.query(Campaign).filter( Campaign.name.like(campdef['campaign'])).first() if (campaign is None): #Don't let colons into campaign names campName = re.sub(':', '', campdef['campaign']) campaign = Campaign(name=campName, lastUpdate=datetime.datetime.utcnow()) Session.add(campaign) Session.commit() except Exception as e: logging.error(traceback.format_exc()) Session.rollback() sys.exit(1) aSrvID = None for j in campdef['jobs']: nodes = j['nodes'] walltime = j['walltime'] queuename = j['queuename'] try: outputFile = j['outputFile'].strip() except: outputFile = None command = j['command'] try: iterable = j['iterable'].strip() except: iterable = None #Check to see if this is a duplicate output file jobsThisOF = Session.query(Job).filter( Job.outputFile.like(outputFile)).count() if (jobsThisOF > 0): print( coloured( 'Warning:' + str(jobsThisOF) + ' job(s) already exist with output file: \n' + outputFile + '\n', 'red')) dbJob = Job(script=command, nodes=nodes, wallTime=walltime, status="To Submit", subStatus="To Submit", campaignID=campaign.id, outputFile=outputFile) dbJob.serverName = 'c:' + campaign.name + ':' if iterable: dbJob.serverName += 'i:' + iterable + ':' if outputFile: #Panda Server doesn't like slashes in its job names dbJob.serverName += 'oF:' + re.sub('/', ';', outputFile) + ':' dbJob.serverName += subprocess.check_output('uuidgen') dbJob.iterable = iterable jobSpec = submissionTools.createJobSpec(walltime=walltime, command=command, outputFile=outputFile, nodes=nodes, jobName=dbJob.serverName) s, o = Client.submitJobs([jobSpec]) try: print(o) dbJob.pandaID = o[0][0] dbJob.status = 'submitted' dbJob.subStatus = 'submitted' print( coloured(iterable.strip() + ", " + str(o[0][0]) + "\n", 'green')) except Exception as e: logging.error(traceback.format_exc()) print(coloured(iterable.strip() + " job failed to submit\n", 'red')) dbJob.status = 'failed' dbJob.subStatus = 'failed' Session.add(dbJob) Session.commit() return None
def submitCampaign(Session, campSpecFile, listFile): # read yaml description jobdef = None try: campdef = submissionTools.PandaJobsJSONParser.parse(campSpecFile) campaign = Session.query(Campaign).filter( Campaign.name.like(campdef['campaign'])).first() if (campaign is None): campaign = Campaign(name=campdef['campaign'], lastUpdate=datetime.datetime.utcnow()) Session.add(campaign) Session.commit() except Exception as e: logging.error(traceback.format_exc()) Session.rollback() sys.exit(1) aSrvID = None nodes = campdef['jobtemplate']['nodes'] walltime = campdef['jobtemplate']['walltime'] queuename = campdef['jobtemplate']['queuename'] try: outputFile = campdef['jobtemplate']['outputFile'] except: outputFile = None command = campdef['jobtemplate']['command'] if (listFile): iterList = [] with open(listFile, 'r') as f: for i in f: ii = re.sub("\n", "", i) iterList.append(ii) else: iterList = [''] for iterable in iterList: if (listFile): jobCommand = re.sub('<iter>', iterable, command) jobOutput = re.sub('<iter>', iterable, outputFile) else: jobCommand = command jobOutput = outputFile dbJob = Job(script=jobCommand, nodes=nodes, wallTime=walltime, status="To Submit", campaignID=campaign.id, outputFile=jobOutput) dbJob.servername = campaign.name + subprocess.check_output('uuidgen') if (listFile): dbJob.iterable = iterable Session.add(dbJob) Session.commit() jobSpec = submissionTools.createJobSpec(walltime=walltime, command=jobCommand, outputFile=jobOutput, nodes=nodes, jobName=dbJob.servername) s, o = Client.submitJobs([jobSpec]) try: dbJob.pandaID = o[0][0] dbJob.status = 'submitted' dbJob.subStatus = 'submitted' print( coloured(iterable.strip() + ", " + str(o[0][0]) + "\n", 'green')) except Exception as e: logging.error(traceback.format_exc()) print(coloured(iterable.strip() + " job failed to submit\n", 'red')) dbJob.status = 'failed' dbJob.subStatus = 'failed' Session.commit() return None