def updateBlackList(black_list): current_black = getPRWblackList() if IsListIn(black_list, current_black): return current_black = ClearFromDuplicates(current_black + black_list) current_dir = os.getcwd() FileName = os.path.realpath( ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt")) Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) ############################################################################### # Find out the current branch to propagage only # # the updated List to the main repository. Other changes regarding # # side developments of the package should not be propagated yet # ############################################################################### upstream = setupGITupstream() current_branch = getBranch() os.chdir(Pkg_Dir) new_branch = "PRW_%s_%s" % (time.strftime("%Y%m%d"), USERNAME) if current_branch: os.system( "git commit -am \"Commit changes of all files in order to push the 'BlackListedPRWdatasets.txt'\"" ) print "INFO: Create new branch %s to update the BlackListedPRWdatasets " % ( new_branch) os.system("git checkout -b %s %s/master" % (new_branch, upstream)) WriteList(sorted(current_black), FileName) os.system("git add BlackListedPRWdatasets.txt") os.system( "git commit BlackListedPRWdatasets.txt -m \"Updated the list of black prw files\"" ) os.system("git push %s %s" % (upstream, new_branch)) if current_branch: os.system("git checkout %s" % (current_branch)) os.chdir(current_dir)
def evaluate_cut_flows(options, Proccessed_Smp=[], analysis="XAMPPbase"): for Sample, Out_File in Proccessed_Smp: if not os.path.exists(Out_File): print "ERROR: No such file or directory %s. Skip sample" % ( Out_File) continue ### Execute the cutflow commands for each region cflow_dir = "%s/reference_cutflows/" % (options.ciDir) CreateDirectory(cflow_dir, False) for region in options.regions: CI_file = "%s/%s_%s_%s.txt" % (cflow_dir, Sample, analysis, region) Cflow_Cmd = "python %s -i %s -a %s | tee %s" % (ResolvePath( "XAMPPbase/python/printCutFlow.py"), Out_File, region, CI_file) if os.system(Cflow_Cmd) != 0: print "ERROR: Could not process cutflow %s in file %s" % ( region, Out_File) del_cmd = "rm %s" % (CI_file) os.system(del_cmd) CI_file_weighted = "%s_weighted.txt" % ( CI_file[:CI_file.rfind(".")]) #### Skip data files to be added to the weighted cutflow if Sample.lower().find("data") != -1: continue Cflow_Cmd = "python %s -i %s -a %s --weighted | tee %s " % ( ResolvePath("XAMPPbase/python/printCutFlow.py"), Out_File, region, CI_file_weighted) if os.system(Cflow_Cmd) != 0: print "ERROR: Could not process cutflow %s in file %s" % ( region, Out_File) del_cmd = "rm %s" % (CI_file_weighted) os.system(del_cmd)
def insertPRWUser(user): Users = getUsersSubmittedPRW() if user in Users: return Users += [user] current_dir = os.getcwd() FileName = os.path.realpath( ResolvePath("XAMPPbase/UsersWhoSubmittedPRW.txt")) Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) ############################################################################### # Find out the current branch to propagage only # # the updated List to the main repository. Other changes regarding # # side developments of the package should not be propagated yet # ############################################################################### upstream = setupGITupstream() current_branch = getBranch() os.chdir(Pkg_Dir) new_branch = "PRW_" + user.replace(".", "_") if current_branch: os.system( "git commit -am \"Commit changes of all files in order to push the 'UsersWhoSubmittedPRW.txt'\"" ) print "INFO: Create new branch %s to update the UsersWhoSubmittedPRW " % ( new_branch) os.system("git checkout -b %s %s/master" % (new_branch, upstream)) print "INFO: %s submitted to the grid prw_config jobs. Add him to the common list such that others can download his files" % ( user) WriteList(sorted(Users), FileName) os.system("git add UsersWhoSubmittedPRW.txt") os.system( "git commit UsersWhoSubmittedPRW.txt -m \"Added %s to the list of users who submitted a prw config creation job\"" % (user)) os.system("git push %s %s" % (upstream, new_branch)) if current_branch: os.system("git checkout %s" % (current_branch)) os.chdir(current_dir)
def getPRWblackList(): FileName = ResolvePath("XAMPPbase/BlackListedPRWdatasets.txt") if not FileName: print "ERROR: The file XAMPPbase/data/BlackListedPRWdatasets.txt could not be found in the repository" print "ERROR: Did you delete it by accident? Please check!!!!" sys.exit(1) return sorted(ReadListFromFile(FileName))
def getUsersSubmittedPRW(): FileName = ResolvePath("XAMPPbase/UsersWhoSubmittedPRW.txt") if not FileName: print "ERROR: The file XAMPPbase/data/UsersWhoSubmittedPRW.txt could not be found in the repository" print "ERROR: Did you delete it by accident? Please check!!!!" sys.exit(1) return sorted(ReadListFromFile(FileName))
def getBranch(): current_dir = os.getcwd() Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) os.chdir(Pkg_Dir) branch = None for B in commands.getoutput("git branch").split("\n"): if B.startswith("*") and B.find("(no branch)") == -1: branch = B os.chdir(current_dir) return branch
def GetKinematicCutFromConfFile(Path, CutName): CutValue = -1 with open(ResolvePath(Path)) as InFile: for line in InFile: if line.find(CutName) > -1: return float(line.replace(CutName + ":", "").strip()) print "WARNING: Could not find the property %s in ST config file %s" % ( CutName, Path) return CutValue
def link_to_copy_area(self, config_file): config_path = ResolvePath(config_file) if not config_path: return None ### Create the directory CreateDirectory(self.config_dir(), False) ### Keep the ending of the file but rename it to a random thing final_path = "%s/%s.%s" % (self.config_dir(), id_generator(45), config_file[config_file.rfind(".") + 1:]) os.system("cp %s %s" % (config_path, final_path)) return final_path
def GetPropertyFromConfFile(Path, CutName): with open(ResolvePath(Path)) as InFile: for line in InFile: line = line.strip() if line.find("#") > -1: line = line[:line.find("#")] if line.find(CutName) > -1: return line.replace(CutName + ":", "").strip() print "WARNING: Could not find the property %s in ST config file %s" % ( CutName, Path) return ""
def getGITremotes(): remotes = {} current_dir = os.getcwd() Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) os.chdir(Pkg_Dir) for line in commands.getoutput("git remote --verbose").split("\n"): remote_name = line.split()[0] remote_url = line.split()[1] remotes[remote_name] = remote_url os.chdir(current_dir) return remotes
def set_cluster_control_module(self, control_location): if len(self.__cluster_control_file): logging.debug("Cluster control module has already been set") return file_loc = ResolvePath(control_location) if not file_loc: logging.error("Could not set the cluster control module") return logging.info("Set the Cluster control module to %s" % (control_location)) self.__cluster_control_file = self.link_to_copy_area(file_loc)
def setupCIparser(): parser = argparse.ArgumentParser( prog='Update reference cutflows', conflict_handler='resolve', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--ciDir", help="Directory where the CI files are located.", default=ResolvePath("XAMPPbase/test")) parser.add_argument("--joboptions", help="Which job options shall be run", default="XAMPPbase/runXAMPPbase.py") parser.add_argument("--regions", help="what are the regions to consider", default=[], nargs="+", required=True) parser.add_argument("--serviceAccount", help="what is the name of the service account to use", default="xampp") parser.add_argument( "--EOSpath", help="Where are all the reference samples located on EOS", default="root://eoshome.cern.ch//eos/user/x/xampp/ci/base/") parser.add_argument( "--TEMPdir", help="Where to store all of the temporary files", default="%s/CI_temp" % (setupBatchSubmitArgParser().get_default("BaseFolder"))) parser.add_argument("--athenaArgParser", help="Which athena argument parser shall be used", default=ResolvePath("XAMPPbase/python/runAthena.py")) parser.add_argument("--evtMax", help="Limit the number of events to run on", default=-1, type=int) parser.add_argument('--noSyst', help='run without systematic uncertainties', action='store_true', default=False) return parser
def main(): """Helper script to obtain encapsulated environments for LocalCluserEnginge""" options = getArgumentParser().parse_args() ### The environment variables are already encapsulated in the script of usage ### Find the location of the script to execute cmd_to_exec = ResolvePath(options.Cmd) if not cmd_to_exec: logging.error("%s does not exist" % (options.Cmd)) exit(1) ### Make sure that we can execute it os.system("chmod 0755 %s" % (cmd_to_exec)) ### Submit it exit(os.system(cmd_to_exec))
def readXAMPPplottingInputConfig(config, sample=None): files = [] with open(ResolvePath(config)) as cfile: for line in cfile: # Ignore comment lines and empty lines line = line.strip() if line.startswith('#'): continue if line.startswith('SampleName '): sample = line.replace('SampleName ', '').replace('\n', '') if line.startswith('Input '): files.extend( line.replace('Input ', '').replace('\n', '').split()) if line.startswith("Import "): files.extend( readXAMPPplottingInputConfig( line.replace('Import', '').replace('\n', '').strip(), sample)) return files
def pack_environment(self, env_vars, script): exec_script = self.link_to_copy_area(script) if not exec_script: return False ship_file = self.write_ship_file(env_vars) if self.run_singularity(): ship_file = self.write_ship_file([ ("CONTAINER_SCRIPT", exec_script), ("CONTAINER_IMAGE", self.singularity_container()), ("CONTAINER_SHIPING_FILE", ship_file), ]) exec_script = self.link_to_copy_area( ResolvePath("ClusterSubmission/Singularity.sh")) env_script = WriteList([ "#!/bin/bash", "source %s" % (ship_file), "source %s" % (exec_script) ], "%s/EnvScript_%s.sh" % (self.config_dir(), id_generator(50))) os.system("chmod 0700 %s" % (env_script)) return env_script
def _cmd_exec(self): if not os.path.exists(self.__script_to_exe): logging.error("<_cmd_exec>: Could not find %s" % (self.__script_to_exe)) return False ### Threads can set their own enviroment variables without affecting the others os.system("chmod 0700 %s" % (self.__script_to_exe)) if self.thread_number() == -1: logging.info("<_cmd_exec> Start job %s" % (self.name())) else: logging.info( "<_cmd_exec> Start task %d/%d in job %s" % (self.thread_number(), self.thread_engine().get_array_size( task_name=self.name()), self.name())) cmd_file = self.thread_engine().pack_environment( env_vars=self.__env_vars, script=self.__script_to_exe) return os.system("python %s --Cmd %s > %s 2>&1" % (ResolvePath("ClusterSubmission/exeScript.py"), cmd_file, self.log_file())) == 0
def _cmd_exec(self): if not os.path.exists(self.__script_to_exe): print "ERROR <_cmd_exec>: Could not find %s" % ( self.__script_to_exe) return False ### Threads can set their own enviroment variables without affecting the others os.system("chmod 0755 %s" % (self.__script_to_exe)) print "INFO <_cmd_exec> Start %s to process %s" % ( self.name(), self.__script_to_exe) return os.system( "python %s --Cmd %s --envVars %s > %s/%s%s.log 2>&1" % ( ResolvePath("ClusterSubmission/exeScript.py"), self.__script_to_exe, " ".join( ["%s %s" % (var, value) for var, value in self.__env_vars]), self.thread_engine().log_dir(), self.name(), "" if self.thread_number() < 1 else "_%d" % (self.thread_number()), )) == 0
def __init__( self, cluster_engine=None, jobOptions="", input_ds=[], run_time="19:59:59", dcache_dir="", alg_opt="", ### Extra options of the algorithm like noSyst... etc vmem=2000, events_per_job=100000, hold_jobs=[], files_per_merge=10, final_split=1, ): self.__cluster_engine = cluster_engine ### Job splitting configurations self.__events_per_job = events_per_job self.__dcache_dir = dcache_dir self.__dcache_loc = ResolvePath(dcache_dir) ### analysis job configurations self.__job_options = jobOptions self.__alg_opt = alg_opt self.__run_time = run_time self.__vmem = vmem ### Hold jobs self.__hold_jobs = [H for H in hold_jobs] ### Merging self.__merge_interfaces = [] self.__files_per_merge_itr = files_per_merge self.__final_split = final_split self.__nsheduled = 0 for ds in sorted(input_ds): if not self.__prepare_input(ds): CreateDirectory(self.engine().config_dir(), True) self.__nsheduled = 0 return False
def getGRL(year=[15, 16, 17, 18], flavour='GRL', config='ClusterSubmission/GRL.json'): """Get from json file either - default Good Run Lists (flavour='GRL') or - default lumi calc files (flavour='lumiCalc') or - default actual mu pile-up reweigthing files (flavour='actualMu')) as a list of strings. Can be called without arguments to give just GRLs for all years or with a specific (list of) year(s). Default input is config='ClusterSubmission/GRL.json' """ if isinstance(year, list): myYears = ClearFromDuplicates([str(y) for y in year if y < 100] + [str(y - 2000) for y in year if y > 2000]) elif isinstance(year, int) or isinstance(year, str): myYears = [str(year)] if year < 100 else [str(year - 2000)] global m_GRLdict if not m_GRLdict: m_GRLdict = json.load(open(ResolvePath(config), 'r')) try: if flavour == 'actualMu' and ('15' in myYears or '16' in myYears): logging.warning("actual mu PRW is only avaliable for data17 and data18.") if not ('17' in myYears or '18' in myYears): logging.error("The request is ill-defined and does not make sense.") raise NameError('actual mu PRW is only avaliable for data17 and data18, not for data15 or data16') return [str(value) for key, value in m_GRLdict[flavour].items() if (value and key in ['data' + y for y in myYears])] except Exception as e: logging.error("Error when accessing GRL/lumiCalc/actualMu information!") raise (e)
def setupGITupstream(upstream="upstream"): current_dir = os.getcwd() Pkg_Dir = os.path.realpath(ResolvePath("XAMPPbase")) remotes = getGITremotes() if len(remotes) == 0: print "ERROR: No remote GIT repository has been found. How did you get the code?" exit(1) if upstream in remotes.iterkeys(): if remotes[upstream].split("/")[-2] == XAMPP_GIT: os.chdir(Pkg_Dir) os.system("git fetch %s" % (upstream)) os.chdir(current_dir) return upstream else: print "INFO: Add the original XAMPPbase repository to the remote list" URLs = [R for R in remotes.itervalues()] UP_URL = URLs[0] ToReplace = UP_URL.split("/")[-2] UP_URL = UP_URL.replace(ToReplace, XAMPP_GIT) os.chdir(Pkg_Dir) os.system("git remote add %s %s" % (upstream, UP_URL)) os.system("git fetch %s" % (upstream)) os.chdir(current_dir) return upstream
def AssembleIO(): #-------------------------------------------------------------- # Reduce the event loop spam a bit #-------------------------------------------------------------- from AthenaCommon.Logging import logging recoLog = logging.getLogger('MuonAnalysis I/O') recoLog.info('****************** STARTING the job *****************') if os.path.exists("%s/athfile-cache.ascii.gz" % (os.getcwd())): recoLog.info( "Old athfile-cache found. Will delete it otherwise athena just freaks out. This little boy." ) os.system("rm %s/athfile-cache.ascii.gz" % (os.getcwd())) from GaudiSvc.GaudiSvcConf import THistSvc from AthenaCommon.JobProperties import jobproperties import AthenaPoolCnvSvc.ReadAthenaPool from AthenaCommon.AthenaCommonFlags import athenaCommonFlags as acf from AthenaServices.AthenaServicesConf import AthenaEventLoopMgr from AthenaCommon.AppMgr import ServiceMgr from ClusterSubmission.Utils import ReadListFromFile, ResolvePath, IsROOTFile from MuonAnalysis.Utils import IsTextFile ServiceMgr += AthenaEventLoopMgr(EventPrintoutInterval=1000000) ServiceMgr += THistSvc() OutFileName = "AnalysisOutput.root" if not "outFile" in globals( ) else outFile ServiceMgr.THistSvc.Output += [ "MuonAnalysis DATAFILE='{}' OPT='RECREATE'".format(OutFileName) ] recoLog.info("Will save the job's output to " + OutFileName) ROOTFiles = [] if "inputFile" in globals(): recoLog.info("Use the following %s as input" % (inputFile)) ROOTFiles = [] ResolvedInFile = ResolvePath(inputFile) if inputFile.startswith('root://'): ROOTFiles.append(inputFile) elif ResolvedInFile and os.path.isfile(ResolvedInFile): if IsTextFile(ResolvedInFile): ROOTFiles = ReadListFromFile(ResolvedInFile) else: ROOTFiles.append(ResolvedInFile) elif ResolvedInFile and os.path.isdir(ResolvedInFile): for DirEnt in os.listdir(ResolvedInFile): if IsROOTFile(DirEnt): if DirEnt.find(ResolvedInFile) != -1: ROOTFiles.append(DirEnt) else: ROOTFiles.append("%s/%s" % (ResolvedInFile, DirEnt)) else: raise RuntimeError("Invalid input " + inputFile) if len(ROOTFiles) == 0: raise RuntimeError("No ROOT files could be loaded as input") ServiceMgr.EventSelector.InputCollections = ROOTFiles acf.FilesInput = ROOTFiles if "nevents" in globals(): recoLog.info("Only run on %i events" % (int(nevents))) theApp.EvtMax = int(nevents) if "nskip" in globals(): recoLog.info("Skip the first %i events" % (int(nskip))) ServiceMgr.EventSelector.SkipEvents = int(nskip) """if isData(): recoLog.info("We're running over data today")
help='Which derivation should be written to the file lists ', default='SUSY2') parser.add_argument("--min_ptag", help="Which ptag should the derivation at least have", default=-1, type=int) parser.add_argument("--max_ptag", help="Which ptag should the derivation at least have", default=-1, type=int) return parser if __name__ == "__main__": RunOptions = setupArgParser().parse_args() Sample_Dir = ResolvePath(RunOptions.ListDir) No_AOD = [] TO_REQUEST = [] if not Sample_Dir: logging.error("ERROR: Please give a valid directory") exit(1) for File in os.listdir(Sample_Dir): if os.path.isdir("%s/%s" % (Sample_Dir, File)): continue logging.info("Update file list %s" % (File)) DataSets = sorted( ClearFromDuplicates([ GetPRW_datasetID(DS) for DS in ReadListFromFile("%s/%s" % (Sample_Dir, File))
def configurePRWtool(offset=0): from AthenaCommon.AppMgr import ServiceMgr from PyUtils import AthFile from ClusterSubmission.Utils import ResolvePath, ClearFromDuplicates recoLog = logging.getLogger('XAMPP getPrwConfig') use1516Data = isData() use17Data = isData() use18Data = isData() ### The actual mu config file is needed to activate the actual mu reweighting recommended for mc16d & mc16e ### https://indico.cern.ch/event/712774/contributions/2928042/attachments/1614637/2565496/prw_mc16d.pdf prwConfig_mc16a = [] prwConfig_mc16d = getGRL(17, flavour='actualMu') prwConfig_mc16e = getGRL(18, flavour='actualMu') run_channel = [] if isData() else [(getRunNumbersMC(), getMCChannelNumber() + offset)] athArgs = getAthenaArgs() if not isData() and (len(ServiceMgr.EventSelector.InputCollections) > 1 and athArgs.parseFilesForPRW): recoLog.info("Run a local job. Try to find foreach job the prw-config file") for i, in_file in enumerate(ServiceMgr.EventSelector.InputCollections): recoLog.info("Look up the channel number for %s" % (in_file)) ### That file is used to read the meta-data we do not need to open it twice if i == 0: continue af = AthFile.fopen(in_file) afII = not isData() and 'tag_info' in af.fileinfos and len( [key for key in af.fileinfos['tag_info'].iterkeys() if 'AtlfastII' in key or 'Fast' in key]) > 0 mc_runNumber = af.fileinfos["run_number"][0] if len(af.fileinfos["run_number"]) > 0 else -1 mc_channel = af.fileinfos["mc_channel_number"][0] if not isData() and len(af.fileinfos["mc_channel_number"]) > 0 else -1 ## If the user mixes AFII with fullsim calibration ## the resuls are likely to mismatch. We must prevent this and kill ## the job if afII != isAF2(): recoLog.error("You are mixing AFII with Fullsim files. Scale-factors and jet calibration are largely affected. Please fix") exit(1) run_channel += [(mc_runNumber, mc_channel + offset)] ## Find the central repo for period_num, mc_channel in run_channel: if period_num == 284500: config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16a_dsid{dsid}_{sim}.root".format( dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS")) use1516Data = True if not config_file: continue prwConfig_mc16a += [config_file] elif period_num == 300000: config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16d_dsid{dsid}_{sim}.root".format( dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS")) use17Data = True if not config_file: continue prwConfig_mc16d += [config_file] elif period_num == 310000: config_file = ResolvePath("dev/PileupReweighting/share/DSID{dsid_short}xxx/pileup_mc16e_dsid{dsid}_{sim}.root".format( dsid_short=str(mc_channel)[0:3], dsid=mc_channel, sim="AFII" if isAF2() else "FS")) use18Data = True if not config_file: continue prwConfig_mc16e += [config_file] else: recoLog.warning("Nothing has been found for the sample %d in prw period %d" % (mc_channel, period_num)) continue ConfigFiles = [] if use1516Data: ConfigFiles += prwConfig_mc16a if use17Data: ConfigFiles += prwConfig_mc16d if use18Data: ConfigFiles += prwConfig_mc16e return sorted(ClearFromDuplicates(ConfigFiles)), getLumiCalcConfig(use1516Data=use1516Data, use17Data=use17Data, use18Data=use18Data)
import os, argparse, ROOT from ClusterSubmission.Utils import WriteList, IsROOTFile, ResolvePath from XAMPPbase.CreateMergedNTUP_PILEUP import readPRWchannels from pprint import pprint if __name__ == "__main__": parser = argparse.ArgumentParser( prog='CompareNTUP_PILEUP', description= 'This script searches for NTUP_PILEUP derivations in rucio (or takes a given list) and sorts the datasets by their AMI-tags. Then it donwloads and merges them accordingly.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--oldPRWDir', help='Path to the previous files', default=ResolvePath("XAMPPbase/PRWFiles")) parser.add_argument('--newPRWDir', help='Path to the new file', required=True) parser.add_argument('--uniteFiles', help="Put everything which was in the old file also in the new one", default=False, action="store_true") RunOptions = parser.parse_args() files_in_old = [f for f in os.listdir(RunOptions.oldPRWDir) if IsROOTFile(f)] files_in_new = [f for f in os.listdir(RunOptions.newPRWDir) if IsROOTFile(f)] MyxSecDB = ROOT.SUSY.CrossSectionDB() for new in files_in_new: if not new in files_in_old: print "WARNING: Strange the file %s is new. Is it a new campaign?" continue chan_in_old = readPRWchannels("%s/%s" % (RunOptions.oldPRWDir, new)) chan_in_new = readPRWchannels("%s/%s" % (RunOptions.newPRWDir, new))
prog='exScript', formatter_class=argparse.ArgumentDefaultsHelpFormatter, description= "Helper script used to provide encapsulated environments for local submission", ) parser.add_argument('--Cmd', help='Location where the command list is stored', required=True) parser.add_argument('--envVars', help='List of variables to execute', required=True, nargs="+", default=[]) options = parser.parse_args() ### Arguments are parsed <var_name> <value> --> odd numbers indicate one of the components is missing if len(options.envVars) % 2 == 1: print "ERROR: Please give to every variable a value to assign" exit(1) ### Export the environment variables for i in range(0, len(options.envVars), 2): os.environ[options.envVars[i]] = options.envVars[i + 1] ### Find the location of the script to execute cmd_to_exec = ResolvePath(options.Cmd) if not cmd_to_exec: print "ERROR: %d does not exist" % (options.Cmd) exit(1) ### Make sure that we can execute it os.system("chmod 0755 %s" % (cmd_to_exec)) ### Submit it exit(os.system(cmd_to_exec))