def main(): """Return runs for a given period or vice versa.""" parser = argparse.ArgumentParser( description='This script returns the runs for a given period or vice versa. For more help type \"python PeriodRunConverter.py -h\"', prog='PeriodRunConverter', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = SetupArgParser(parser) Options = parser.parse_args() GetPeriodRunConverter() for Y in Options.year: prettyPrint(preamble="Found the following periods for year", data="%d" % (Y), width=30, separator=":") for P in GetPeriods(Y): First, Last = GetPeriodRunConverter().GetFirstRunLastRun(Y, P, Options.project) prettyPrint(preamble="Period %s from " % (P), data="%d --- %d" % (First, Last), width=40, separator="***") if len(Options.runNumber) > 0 and len(Options.period) > 0: logging.info('Both runNumber (%s) and period (%s) were given, checking if given runs are included in given periods...' % (Options.runNumber, Options.period)) logging.info(RunInPeriod(Options.runNumber, Options.year, Options.period)) elif len(Options.runNumber) > 0 and len(Options.period) == 0: logging.info('A runNumber (%s) was given, checking corresponding period...' % (Options.runNumber)) logging.info(GetPeriodFromRun(Options.runNumber[0])) elif len(Options.runNumber) == 0 and len(Options.period) > 0: logging.info('A period (%s) was given, checking corresponding runs for year(s) %s ...' % (Options.period, Options.year)) logging.info(" ".join(["%d" % (r) for r in GetRunsFromPeriod(Options.year, Options.period)])) else: logging.error('Please specify at least one runNumber or one period. For help use "python PeriodRunConverter.py -h"')
def print_banner(self): print "#####################################################################################################" print " ClusterEngine for job %s " % self.__jobName print "#####################################################################################################" prettyPrint("JobName", self.job_name()) prettyPrint("LogIdr", self.log_dir()) prettyPrint("BuildDir", self.build_dir()) prettyPrint("TmpDir", self.tmp_dir()) prettyPrint("outputDir", self.out_dir())
def getTarBallOptions(renew=False): TarList = [] os.chdir(TESTAREA) if renew and os.path.isfile(TESTAREA + "/TarBall.tgz"): prettyPrint("Delete exisiting TarBall", TESTAREA + "/TarBall.tgz") os.system("rm %s/TarBall.tgz" % (TESTAREA)) CREATETARBALL = (os.path.isfile(TESTAREA + "/TarBall.tgz") == False) if CREATETARBALL: TarList = [ "--extFile=*.root", "--outTarBall=TarBall.tgz", "--excludeFile=\"*.svn*\",\"*.git*\",\"*.pyc\",\"*.*~\",\"*.tex\",\"*.tmp\",\"*.pdf\",\"*.png\",\"*.log\",\"*.dat\",\"*.core\",\"*README*\"" ] else: TarList = ["--inTarBall=TarBall.tgz"] return TarList
def createFileList(dsname, options): prettyPrint('Creating file list for', dsname) DS = GetDataSetFiles(dsname, options.RSE, options.protocols) if len(DS) == 0: print "No datasets found" return if dsname.find(":") > -1: dsname = dsname[dsname.find(":") + 1:len(dsname)] OutDir = options.OutDir if os.path.exists(OutDir) == False: print "mkdir -p " + OutDir os.system("mkdir -p " + OutDir) filelistname = OutDir + "/" + dsname.rstrip('/') + ".txt" if os.path.exists(filelistname) == True: print "Remove the old FileList" os.system("rm " + filelistname) WriteList(DS, filelistname)
def GetDataSetFiles(dsname, RSE, protocols): prettyPrint("Get the files of the dataset", dsname) DSReplicas = commands.getoutput( "rucio list-file-replicas --protocols %s --rse %s %s " % (protocols, RSE, dsname)) DS = [] for line in DSReplicas.split("\n"): Entry = None LineInfo = line.split() for i, column in enumerate(LineInfo): if RSE in column: try: Entry = LineInfo[i + 1] break except: print "There was some strange noise here ", column pass if Entry: print Entry DS.append(Entry) return DS
def getTarBallOptions(renew=False): """ @brief Gets the tar ball options for creating a TarBall (https://en.wikipedia.org/wiki/Tar_(computing)) when submitting an athena job over the grid. This option saves a gzipped tarball of local files which is used as the build of the code for running the job. The TarBall.tgz file is saved in your TestArea. If there is no TarBall.tgz file, a new one is built. If there is already a TarBall file, the existing TarBall is used for submitting the job. This means: if you change your code after you built your TarBall (even if you recompile) but there is still the old TarBall.tzg file in your TestArea, your changes won't have effect since the old TarBall is still used. You either have to delete the TarBall manually or use the renew option. @param renew Create a new TarBall even if an old one exists and overwrite the old one. @return String with command line option for athena specifying the use of the TarBall """ TarList = [] os.chdir(TESTAREA) if renew and os.path.isfile(TESTAREA + "/TarBall.tgz"): prettyPrint("Delete exisiting TarBall", TESTAREA + "/TarBall.tgz") os.system("rm %s/TarBall.tgz" % (TESTAREA)) CREATETARBALL = (os.path.isfile(TESTAREA + "/TarBall.tgz") == False) if CREATETARBALL: TarList = [ "--extFile=*.root", "--outTarBall=TarBall.tgz", "--excludeFile=\"*.svn*\",\"*.git*\",\"*.pyc\",\"*.*~\",\"*.tex\",\"*.tmp\",\"*.pdf\",\"*.png\",\"*.log\",\"*.dat\",\"*.core\",\"*README*\",\"XAMPPplotting/data/*\"" ] else: TarList = ["--inTarBall=TarBall.tgz"] return TarList
def __schedule_jobs(self, HoldJobs, sub_job="", RequireOk=True): prettyPrint( "", "#############################################################################" ) if len(sub_job) == 0: prettyPrint("Submit cluster job:", self.job_name()) else: prettyPrint("Submit job: ", "%s in %s" % (sub_job, self.job_name())) prettyPrint( "", "#############################################################################" ) info_written = False to_hold = [] dependency_str = "" for H in HoldJobs: ids = self.__slurm_id(H) if len(ids) > 0: if not info_written: prettyPrint( "Hold %s until" % (sub_job if len(sub_job) else self.job_name()), "") info_written = True prettyPrint("", H if isinstance(H, str) else ("%s [%s]" % (H[0], ",".join(str(h) for h in H[1])) if isinstance(H[1], list) else "%s [1 by 1]" % (H[0])), width=32, separator='*') ### Usual dependency on entire jobs or certain subjobs in an array if isinstance(H, str) or isinstance(H[1], list): to_hold += ids elif isinstance(H[1], int) and H[1] == -1: dependency_str += " --dependency=aftercorr:%s " % ( ":".join(ids)) else: logging.error("<schedule_jobs> Invalid object ") logging.error(H) exit(1) if len(to_hold) == 0: return "" if len(dependency_str) > 0: return dependency_str return " --dependency=" + ("afterok:" if RequireOk else "after:") + ":".join(to_hold)
def __schedule_jobs(self, to_hold, sub_job): To_Hold = "" prettyPrint( "", "#############################################################################" ) if len(sub_job) == 0: prettyPrint("Submit cluster job:", self.job_name()) else: prettyPrint( "Submit job: ", "%s in %s" % (self.subjob_name(sub_job), self.job_name())) prettyPrint( "", "#############################################################################" ) info_written = False for H in to_hold: if not info_written: prettyPrint( "Hold %s until" % (self.subjob_name(sub_job) if len(sub_job) else self.job_name()), "") if isinstance(H, str): To_Hold = ":".join([ "%s" % s for s in ([To_Hold] + self.__jobName_to_jobID(H)) if s != "" ]) elif isinstance(H, tuple): To_Hold = ":".join([ "%s" % s for s in ([To_Hold] + self.__jobName_to_jobID(H[0], H[1])) if s != "" ]) else: logging.error("<_schedule_jobs>: Invalid object") logging.error(H) exit(1) prettyPrint("", H if isinstance(H, str) else H[0]) if (len(To_Hold)): if "Clean" in self.subjob_name( sub_job) or "Copy-LCK" in self.subjob_name(sub_job): To_Hold = "-W depend=afterany:" + To_Hold else: To_Hold = "-W depend=afterok:" + To_Hold return To_Hold
def SubmitJobs(RunOptions, AnalysisOptions): """ @brief Submit grid jobs using pathena with XAMPP @param RunOptions The run options @param AnalysisOptions The analysis options """ CheckPandaSetup() RUCIO_ACC = os.getenv("RUCIO_ACCOUNT") os.chdir(TESTAREA) PRUN_Options = [] PRUN_Options += getTarBallOptions(renew=RunOptions.newTarBall) InDSList = RunOptions.inputDS GroupDisk = RunOptions.destSE != '-1' Duplicate = RunOptions.DuplicateTask InputDataSets = [] FilesPerJob = RunOptions.nFilesPerJob nJobs = RunOptions.nJobs OutFiles = [RunOptions.outFile] ################################################# # Assemble the dataset list ################################################# #The path is directly given if os.path.exists(InDSList): InputDataSets += ReadListFromFile(InDSList) # if not, it should be a DS or a comma-separated list of DS elif ',' in InDSList: InputDataSets += InDSList.split(",") #Assume there is only one DS given to the script else: InputDataSets.append(InDSList) ############################################### # No dataset could be extracted from the list ############################################### if len(InputDataSets) == 0: prettyPrint('ERROR', 'No input dataset found') exit(1) if RunOptions.noAmiCheck: print('WARNIG: The check of the existence of the inputDS is disabled, which should be fine.') else: sys.stdout.write('Checking in rucio if dataset(s) is exist(s)..') sys.stdout.flush() DSCandidates = InputDataSets InputDataSets = [] FoundAll = True for D in DSCandidates: sys.stdout.write('.') sys.stdout.flush() D = D.replace("/", "").strip() if not DoesDSExists(D): print('ERROR: The input DS %s is not known!' % D) FoundAll = False else: InputDataSets.append(D) if FoundAll: print(' Successful!') if len(InputDataSets) == 0: print('ERROR: There are no valid input DS') exit(1) PRUN_Options.append("--inDS=\"%s\"" % ",".join(InputDataSets)) # Sets the OutputDataSetName Scope = "user.%s" % (RUCIO_ACC) if len(RunOptions.productionRole) == 0 else "group.%s" % (RunOptions.productionRole) OutDS = "%s.%s" % (Scope, RunOptions.outDS) if len(RunOptions.productionRole) > 0: PRUN_Options += ["--official", "--voms=atlas:/atlas/%s/Role=production" % (RunOptions.productionRole)] PRUN_Options.append('--outDS=\"%s\"' % OutDS.replace('\r', '')) PRUN_Options.append("--express") PRUN_Options.append("--useShortLivedReplicas") # Additional Options parsing to the prun Command if RunOptions.Test == True: GroupDisk = False Duplicate = True FilesPerJob = 1 nJobs = 1 PRUN_Options.append("--nFiles=1") PRUN_Options.append("--disableAutoRetry") if GroupDisk: PRUN_Options.append("--destSE=%s" % RunOptions.destSE) if Duplicate: PRUN_Options.append("--allowTaskDuplication") if nJobs > 0: PRUN_Options.append("--nJobs=%d " % nJobs) PRUN_Options.append("--nFilesPerJob=%i" % FilesPerJob) #PRUN_Options.append("--useNewCode") PRUN_Options.append("--mergeOutput") print( "################################################################################################################################################################" ) print(" XAMPP on the grid") print( "################################################################################################################################################################" ) prettyPrint('USER', os.getenv("USER")) prettyPrint('RUCIO', RUCIO_ACC) prettyPrint('WORKINGDIR', TESTAREA) prettyPrint('TODAY', time.strftime("%Y-%m-%d")) prettyPrint("ATLASPROJECT", ATLASPROJECT) prettyPrint("ATLASVERSION", ATLASVERSION) prettyPrint("TESTAREA", TESTAREA) print( "################################################################################################################################################################" ) print(" JobOptions") print( "################################################################################################################################################################" ) prettyPrint('InputDataSets:', '', separator='') for ds in InputDataSets: prettyPrint('', ds, width=32, separator='-') prettyPrint('FilesPerJob', str(FilesPerJob)) if nJobs > 0: prettyPrint('NumberOfJobs', str(nJobs)) prettyPrint('OutputContainer', OutDS) MakeTarBall = not os.path.isfile(TESTAREA + "/TarBall.tgz") prettyPrint('CreateTarBall', ("Yes" if MakeTarBall else "No")) if not MakeTarBall: prettyPrint('', 'Using already existing tarball located at:', width=32, separator='->') prettyPrint('', '%s/TarBall.tgz' % TESTAREA, width=34, separator='') prettyPrint('JobOptions', RunOptions.jobOptions) prettyPrint('RunWithSystematics', "Yes" if not RunOptions.noSyst else "No") print( "################################################################################################################################################################" ) ### Tell athena that a grid-job is running --> ### each job has a single type of datasets Command = "pathena %s %s" % (BringToAthenaStyle(RunOptions.jobOptions), " ".join(PRUN_Options + AnalysisOptions)) print('\nSubmitting using command:\n%s\n...' % Command) os.system(Command)
def SubmitJobs(RunOptions, AnalysisOptions): os.chdir(TESTAREA) PRUN_Options = [] PRUN_Options += getTarBallOptions(renew=RunOptions.newTarBall) InDSList = RunOptions.inputDS GroupDisk = RunOptions.destSE != '-1' Duplicate = RunOptions.DuplicateTask InputDataSets = [] OutFiles = [RunOptions.outputSuffix] FilesPerJob = RunOptions.FilesPerJob nJobs = RunOptions.nJobs Campaign = RunOptions.DSCampaign ################################################# # Assemble the datasets list ################################################# InDSListDir = "/srv01/cgrp/users/jpham/Wanalysis_AthAnalysis21.2.69_withSUSYTools/data/%s/" % ( Campaign) if os.path.exists(InDSListDir) == False: print( "ERROR: Campaign is invalid. Could not find the directory containing the sample lists." ) exit(1) # check if inputDS was a file if os.path.exists(InDSListDir + InDSList): InputDataSets.extend( ReadListFromDSFile(InDSListDir + InDSList, Campaign)) # if not, it should be a DS or a comma-separated list of DS else: if ',' in InDSList: for DS in InDSList.split(','): InputDataSets.append(DS) else: InputDataSets.append(InDSList) print(InputDataSets) #The path is directly given """if os.path.exists(InDSList): InputDataSets += ReadListFromFile(InDSList) # if not, it should be a DS or a comma-separated list of DS elif ',' in InDSList: InputDataSets += InDSList.split(",") #Assume there is only one DS given to the script else: InputDataSets.append(InDSList) ############################################### # No dataset could be extracted from the list ############################################### if RunOptions.noAmiCheck: print('WARNIG: The check of the existence of the inputDS is disabled, which should be fine.') else: sys.stdout.write('Checking in rucio if dataset(s) is exist(s)..') sys.stdout.flush() DSCandidates = InputDataSets InputDataSets = [] FoundAll = True for D in DSCandidates: sys.stdout.write('.') sys.stdout.flush() D = D.replace("/", "").strip() if not DoesDSExists(D): print('ERROR: The input DS %s is not known!' % D) FoundAll = False else: InputDataSets.append(D) if FoundAll: print(' Successful!')""" if len(InputDataSets) == 0: print('ERROR: There are no valid input DS') #exit(1) #PRUN_Options.append("--inDS=\"%s\"" % ",".join(InputDataSets)) #PRUN_Options.append("--inDS=\"%s\""%ConvertListToString(InputDataSets,",")) # Sets the OutputDataSetName JobName = RunOptions.analysis if RunOptions.jobName != None and RunOptions.jobName != "": JobName = RunOptions.jobName if len(JobName) == 0: print("ERROR: Please give a JobName") exit(1) """for dataset in InputDataSets: DSID = dataset.split(".")[1].split(".")[0] TAG = dataset.split(".")[-1].split("/")[0] print ('submit of single DSID: %s with tag: %s'%(DSID,TAG))""" """if RunOptions.outDS == '': if len(InputDataSets) == 1: DSID = InputDataSets[0].split(".")[1].split(".")[0] TAG = InputDataSets[0].split(".")[-1].split("/")[0] print( 'submit of single DSID: %s with tag: %s' % (DSID, TAG)) OutDS = "user.%s.%s.%s.%s.%s" % (RUCIO_ACC, DSID, TAG, TODAY, JobName) else: OutDS = "user.%s.%s.%s" % (RUCIO_ACC, TODAY, JobName) else: OutDS = "user.%s.%s" % (RUCIO_ACC, RunOptions.outDS) PRUN_Options.append("--outDS=\"%s\"" % OutDS)""" # Additional Options parsing to the prun Command if RunOptions.Test == True: GroupDisk = False Duplicate = True FilesPerJob = 1 nJobs = 1 PRUN_Options.append("--nFiles=1") PRUN_Options.append("--disableAutoRetry") #if GroupDisk: RunOptions.append("--destSE=%s" % RunOptions.destSE) #if Duplicate: RunOptions.append("--allowTaskDuplication") #if nJobs > 0: RunOptions.append("--nJobs=%d " % nJobs) PRUN_Options.append("--mergeOutput") if RunOptions.exSite: PRUN_Options.append("--excludedSite=%s" % EXCLUDEDSITE) AnalysisOptions = [] FilesPerJob = 50 if RunOptions.debug: PRUN_Options.append("--debug") if RunOptions.nevents: PRUN_Options.append("--evtMax %i" % RunOptions.nevents) if RunOptions.skipEvents: PRUN_Options.append("--skipEvents %i" % RunOptions.skipEvents) #PRUN_Options.append("--runModus grid") PRUN_Options.append("--nFilesPerJob=%i" % FilesPerJob) #PRUN_Options.append("--outputs=\"%s\""%ConvertListToString(OutFiles ,",")) print( "################################################################################################################################################################" ) print( " MuonAnalysis on the grid" ) print( "################################################################################################################################################################" ) prettyPrint('USER', USERNAME) prettyPrint('RUCIO', RUCIO_ACC) prettyPrint('WORKINGDIR', TESTAREA) prettyPrint('TODAY', time.strftime("%Y-%m-%d")) prettyPrint("ATLASPROJECT", ATLASPROJECT) prettyPrint("ATLASVERSION", ATLASVERSION) prettyPrint("TESTAREA", TESTAREA) print( "################################################################################################################################################################" ) print( " JobOptions" ) print( "################################################################################################################################################################" ) prettyPrint('InputDataSets:', '', separator='') for ds in InputDataSets: prettyPrint('', ds, width=32, separator='-') prettyPrint('FilesPerJob', str(FilesPerJob)) if nJobs > 0: prettyPrint('NumberOfJobs', str(nJobs)) #prettyPrint('OutputContainer', OutDS) MakeTarBall = not os.path.isfile(TESTAREA + "/TarBall.tgz") prettyPrint('CreateTarBall', ("Yes" if MakeTarBall else "No")) if not MakeTarBall: prettyPrint('', 'Using already existing tarball located at:', width=32, separator='->') prettyPrint('', '%s/TarBall.tgz' % TESTAREA, width=34, separator='') prettyPrint('JobOptions', RunOptions.jobOptions) prettyPrint('RunWithSystematics', "Yes" if not RunOptions.noSyst else "No") print( "################################################################################################################################################################" ) AnalysisOptions.reverse() #print('\nSubmitting using command:\n%s\n...' % Command) #print(str(" ".join(PRUN_Options)) #print(str(AssembleConfigArgument(AnalysisOptions))) #print(str(RunOptions.jobOptions)) #os.system(Command) for dataset in InputDataSets: DSID = dataset.split(".")[1].split(".")[0] TAG = dataset.split(".")[-1].split("/")[0] print('DSID: %s with tag: %s' % (DSID, TAG)) #OutDS = "user.%s.%s.%s.%s.%s"%(RUCIO_ACC,DSID,TAG,TODAY,JobName) print("InputContainer: " + dataset) OutDS = "user.%s.%s.%s.%s.%s" % (RUCIO_ACC, DSID, TAG, TODAY, JobName) #PRUN_Options.append("--outDS=\"%s\"" % OutDS) print("OutputContainer: " + OutDS) prettyPrint('OutputContainer', OutDS) #Command = "prun --exec=\"python XAMPPanalyses/python/runHeavyIon.py %s\" --inDS %s --outDS %s %s"%(ConvertListToString (AnalysisOptions, " ") , dataset, OutDS, ConvertListToString(PRUN_Options , " ")) Command = "" Command = "pathena --inDS=%s --outDS=%s %s %s %s" % ( dataset, OutDS, " ".join(PRUN_Options), AssembleConfigArgument(AnalysisOptions), RunOptions.jobOptions) #Command = "pathena %s %s %s %s" %(ConvertListToString (AnalysisOptions, " ") , dataset, OutDS, ConvertListToString(PRUN_Options , " ")) print('\nSubmitting using command:\n%s\n...' % Command) os.system(Command) exit(0)
def __shedule_jobs(self, to_hold, sub_job): To_Hold = "" prettyPrint( "", "#############################################################################" ) if len(sub_job) == 0: prettyPrint("Submit cluster job:", self.job_name()) else: prettyPrint("Submit job: ", "%s in %s" % (sub_job, self.job_name())) prettyPrint( "", "#############################################################################" ) info_written = False for H in To_Hold: if not info_written: prettyPrint( "Hold %s until" % (sub_job if len(sub_job) else self.job_name()), "") if isinstance(H, str): To_Hold += " -hold_jid %s" % (H) elif isinstance(H, tuple): To_Hold += " -hold_jid %s" % (H[0]) else: print "ERROR <_shedule_jobs>: Invalid object", H exit(1) prettyPrint("", H if isinstance(H, str) else H[0]) return To_Hold