def __init__(self, tuples, # A list or dictionary of input file paths. name=None, out_dir=None, # The directory where the output should go. out_file=None, # The name of the output file (including ".root") save=True, v=False, tt_names=["analyzer/events"], # The names of the input TTrees count=True, ): # Arguments and variables: self.name = inspect.stack()[1][1][:-3] if name == None else name # This makes the name attribute be the script name if it's not already called something. self.time_string = utilities.time_string()[:-4] # A time string indicating when the analyzer was created self.save = save self.count = count # Organize input: if isinstance(tuples, dict): self.tuples_in = tuples # This is for potential debugging. elif isinstance(tuples, str) or isinstance(tuples, list): if isinstance(tuples, str): tuples = [tuples] if isinstance(tuples, list): if isinstance(tuples[0], str): tuples = {self.name: tuples} else: tuples = {self.Name: tuples} else: print "ERROR (analyzer): \"tuples\" should be a string, list, or dictionary." # Determine if tuples are raw (file locations) or dataset instances: if v: print "Making TChain(s) ..." self.tt_in = {} self.tt_info = {} self.tc = TCanvas("tc_{}".format(name), "tc_{}".format(name), 500, 500) SetOwnership(self.tc, 0) samples = tuples.keys() for sample, tups in tuples.iteritems(): ns = [] # Handle different input schemes (either list of tuples or list of file names): food = 2 if isinstance(tups[0], dataset.dataset) else 1 if food == 2: # A bit KLUDGY: fs = [] for tup in tups: fs += [f if "root://cmsxrootd.fnal.gov/" in f else "root://cmsxrootd.fnal.gov/" + f for f in tup.files] ns += tup.ns tups = fs if v: print "\tMaking TChain(s) for {} ...".format(sample) # self.tt_in[sample] = [] for tt_name in tt_names: if v: print "\t\tTChain named {}:".format(tt_name) tt = TChain(tt_name) for tup in tups: tt.Add(tup) if v: print "\t\t+ {}".format(tup) SetOwnership(tt, 0) if (len(tt_names) > 1) and (len(samples) > 1): key = "_".join([sample, tt_name]) elif (len(tt_names) > 1) and (len(samples) == 1): key = tt_name elif len(tt_names) == 1: key = sample else: print "ERROR (analyzer.__init__): The tuples configuration is weird:\n{}".format(tuples) sys.exit() self.tt_in[key] = tt info = { "ns": ns } self.tt_info[key] = info # self.tc[key] = TCanvas("tc_{}".format(key), "tc_{}".format(key), 500, 500) # SetOwnership(self.tc[key], 0) # ROOT setup: # if v: print "\tSetting up ROOT ..." gROOT.SetStyle("Plain") gStyle.SetTitleBorderSize(0) gStyle.SetPalette(1) gROOT.SetBatch() # Prevent canvas windows from opening # SetOwnership(gROOT, 0) # Organize output: if save: # Set attributes to defaults if they aren't set: ## Output directory: if not out_dir: self.out_dir = "results/{}_{}".format(self.name, self.time_string) if not os.path.exists(self.out_dir): os.makedirs(self.out_dir) ## Output file: if not out_file: self.out_file = "{}_{}.root".format(self.name, self.time_string) # Define new attributes: self.out_path = self.out_dir + "/" + self.out_file ## ROOT output: self.out = TFile(self.out_path, "RECREATE") SetOwnership(self.out, 0) ### Tuple output: self.tt_out = {} for key, tt in self.tt_in.iteritems(): self.tt_out[key] = TTree(key, 'anatuple') SetOwnership(self.tt_out[key], 0) ### Histograms: self.plots = [] # Event loops self.loops = {} for key, tt in self.tt_in.iteritems(): self.loops[key] = event_loop(self, key)
def __init__( self, tuples, # A list or dictionary of input file paths. name=None, out_dir=None, # The directory where the output should go. out_file=None, # The name of the output file (including ".root") save=True, v=False, tt_names=["tuplizer/events"], # The names of the input TTrees count=None, use_condor=False, ): # Basic configuration: gROOT.SetBatch() # Prevent ROOT canvas windows from opening # Arguments and variables: self.name = inspect.stack()[1][ 1][:-3] if name == None else name # This makes the name attribute be the script name if it's not already called something. self.time_string = utilities.time_string( )[:-4] # A time string indicating when the analyzer was created self.save = save self.count = count self.condor = use_condor self.out_file = out_file self.out_dir = out_dir self.tuples_in = tuples # This is useful debugging. self.tuples = {} self.tt_names = tt_names # Organize input: if isinstance(tuples, str): tuples = [tuples] if isinstance(tuples, list): if all(isinstance(tup, str) for tup in tuples): self.tuples = {self.name: tuples} elif all(isinstance(tup, dataset.dataset) for tup in tuples): for tup in tuples: if tup.process not in self.tuples: self.tuples[tup.process] = [] self.tuples[tup.process].append(tup) else: print "ERROR (analyzer): unrecognized input:" print self.tuples_in sys.exit() elif isinstance(tuples, dict): self.tuples = tuples else: print "ERROR (analyzer): \"tuples\" should be a string, list, or dictionary." print "tuples = {}".format(tuples) sys.exit() # Calculate event number if necessary: if self.count == None: if any([ isinstance(tup, str) for tup in utilities.flatten_list( [thing for thing in self.tuples.values()]) ]): self.count = True else: self.count = False ## Create TChains: if v: print "Making TChain(s) ..." self.tt_in = {} self.tt_info = {} self.tc = TCanvas("tc_{}".format(name), "tc_{}".format(name), 500, 500) SetOwnership(self.tc, 0) samples = self.tuples.keys() for sample, tups in self.tuples.items(): ns = [] # Handle different input schemes (either list of tuples or list of file names): food = 2 if isinstance(tups[0], dataset.dataset) else 1 if food == 2: # A bit KLUDGY: fs = [] for tup in tups: fs += [ f if not tup.dir.eos else "root://cmsxrootd.fnal.gov/" + f for f in tup.files ] ns += tup.ns tups = fs if v: print "\tMaking TChain(s) for {} ...".format(sample) # self.tt_in[sample] = [] if not self.tt_names: tt_names = [sample] for tt_name in tt_names: if v: print "\t\tTChain named {}:".format(tt_name) tt = TChain(tt_name) for tup in tups: tt.Add(tup) if v: print "\t\t+ {}".format(tup) SetOwnership(tt, 0) if (len(tt_names) > 1) and (len(samples) > 1): key = "_".join([sample, tt_name]) elif (len(tt_names) > 1) and (len(samples) == 1): key = tt_name elif len(tt_names) == 1: key = sample else: print "ERROR (analyzer.__init__): The tuples configuration is weird:\n{}".format( self.tuples) sys.exit() self.tt_in[key] = tt info = {"ns": ns} self.tt_info[key] = info # self.tc[key] = TCanvas("tc_{}".format(key), "tc_{}".format(key), 500, 500) # SetOwnership(self.tc[key], 0) # ROOT setup: # if v: print "\tSetting up ROOT ..." gROOT.SetStyle("Plain") gStyle.SetTitleBorderSize(0) gStyle.SetPalette(1) # SetOwnership(gROOT, 0) # Organize output: if save: # Set attributes to defaults if they aren't set: ## Output directory: if not out_dir and not self.condor: self.out_dir = "results/{}_{}".format(self.name, self.time_string) elif self.condor: self.out_dir = "." if not os.path.exists(self.out_dir): os.makedirs(self.out_dir) ## Output file: if not out_file and not self.condor: self.out_file = "{}_{}.root".format(self.name, self.time_string) if self.condor: self.out_file = "job_{}.root".format(self.condor) # Define new attributes: self.out_path = self.out_dir + "/" + self.out_file ## ROOT output: self.out = TFile(self.out_path, "RECREATE") SetOwnership(self.out, 0) ### Tuple output: self.tt_out = {} for key, tt in self.tt_in.iteritems(): self.tt_out[key] = TTree(key, 'anatuple') SetOwnership(self.tt_out[key], 0) ### Histograms: self.plots = [] # Event loops self.loops = {} for key, tt in self.tt_in.iteritems(): self.loops[key] = event_loop(self, key)
def main(): # Arguments: a = variables.arguments() miniaods = dataset.fetch_entries("miniaod", a.query) tstring = utilities.time_string()[:-4] suffix = "cutpt{}".format(cut_pt_filter) cmssw_version = cmssw.get_version(parsed=False) for miniaod in miniaods: print "Making condor setup for {} ...".format(miniaod.Name) sample = miniaod.get_sample() # Create groups of input files: groups = [] group = [] n_group = 0 # print miniaod.ns for i, n in enumerate(miniaod.ns): n_group += n group.append(miniaod.files[i]) if (n_group >= n_per) or (i == len(miniaod.ns) - 1): groups.append(group) group = [] n_group = 0 print "\tCreating {} jobs ...".format(len(groups)) # Prepare directories: path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix) log_path = path + "/logs" if not os.path.exists(path): os.makedirs(path) if not os.path.exists(log_path): os.makedirs(log_path) eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format( sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring ) # Output path. # Create job scripts: for i, group in enumerate(groups): job_script = "#!/bin/bash\n" job_script += "\n" job_script += "# Untar CMSSW area:\n" job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version) job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version) job_script += "\n" job_script += "# Setup CMSSW:\n" job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" job_script += "eval `scramv1 runtime -sh` #cmsenv\n" job_script += "\n" job_script += "# Run CMSSW:\n" list_str = ",".join(['"{}"'.format(g) for g in group]) out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1) job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format( miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str ) if sample.data: job_script += " data=True".format(sample.data) if sample.mask: job_script += ' mask="{}"'.format(sample.mask) job_script += " &&\n" job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path) job_script += "rm {}\n".format(out_file) with open("{}/job_{}.sh".format(path, i + 1), "w") as out: out.write(job_script) # Create condor configs: for i, group in enumerate(groups): job_config = "universe = vanilla\n" job_config += "Executable = job_{}.sh\n".format(i + 1) job_config += "Should_Transfer_Files = YES\n" job_config += "WhenToTransferOutput = ON_EXIT\n" job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version) job_config += 'Transfer_Output_Files = ""\n' job_config += "Output = logs/job_{}.stdout\n".format(i + 1) job_config += "Error = logs/job_{}.stderr\n".format(i + 1) job_config += "Log = logs/job_{}.log\n".format(i + 1) job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n" job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n" job_config += "Queue 1\n" with open("{}/job_{}.jdl".format(path, i + 1), "w") as out: out.write(job_config) # Create run script: run_script = "# Update cache info:\n" run_script += "bash $HOME/condor/cache.sh\n" run_script += "\n" run_script += "# Grid proxy existence & expiration check:\n" run_script += "PCHECK=`voms-proxy-info -timeleft`\n" run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n' run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n" run_script += "fi\n" run_script += "\n" run_script += "# Copy python packages to CMSSW area:\n" run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n" run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n" run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n" run_script += "\n" run_script += "# Make tarball:\n" run_script += "echo 'Making a tarball of the CMSSW area ...'\n" run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n" run_script += "\n" run_script += "# Prepare EOS:\n" run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path) run_script += "\n" run_script += "# Submit condor jobs:\n" for i, group in enumerate(groups): run_script += "condor_submit job_{}.jdl\n".format(i + 1) run_script += "\n" run_script += "# Remove tarball:\n" run_script += "#rm ${CMSSW_VERSION}.tar.gz\n" # I if remove this, the jobs might complain. run_script += "\n" run_script += "# Remove python packages:\n" run_script += "#rm -rf $CMSSW_BASE/python/decortication\n" run_script += "#rm -rf $CMSSW_BASE/python/resources\n" run_script += "#rm -rf $CMSSW_BASE/python/truculence\n" with open("{}/run.sh".format(path), "w") as out: out.write(run_script) print "\tThe jobs are in {}".format(path) return True
def create_jobs(self, cmd="", memory=2000, input_files=None): # Create condor jobs for each input file. # if not cmd: # print "ERROR (analyzer.create_jobs): You need to specify a cmd to run for each job." # return False # Define variables: Site = dataset.Site data_dir = Site.get_dir("data") cmssw_version = cmssw.get_version(parsed=False) if not cmd: cmd = "python {}.py -f %%FILE%% -o job_%%N%%.root".format( self.name) tstring = utilities.time_string()[:-4] path = "condor_jobs/{}/{}".format(self.name, tstring) log_path = path + "/logs" # out_path = path + "/results" out_path = os.path.join(data_dir.path, "analyzer_jobs", tstring) # Output path. files_for_condor = [ "{}/{}.py".format(os.getcwd(), self.name), "{}.tar.gz".format(cmssw_version) ] if isinstance(input_files, str): input_files = [input_files] if input_files: input_files = [ os.getcwd() + "/" + f for f in input_files if "/" not in f ] if input_files: files_for_condor.extend(input_files) # Make directories for p in [path, log_path]: if not os.path.exists(p): os.makedirs(p) # Make job files: files = self.get_files(info=True) ## Make job scripts: for i, f_dict in enumerate(files): f = f_dict["file"] if f[:12] == "/store/user/": f = "root://cmseos.fnal.gov/" + f job_name = "analyzer_job{}_{}".format(i + 1, tstring) job_script = "#!/bin/bash\n" job_script += "\n" job_script += "# Untar CMSSW area:\n" job_script += "tar -xzf {}.tar.gz &&\n".format(cmssw_version) for input_file in input_files: input_file = input_file.split("/")[-1] if "CMSSW_" not in f: job_script += "cp {} {}/src/Analyzers/FatjetAnalyzer/test\n".format( input_file, cmssw_version) job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format( cmssw_version) job_script += "\n" job_script += "# Setup CMSSW:\n" job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" job_script += "scramv1 b ProjectRename\n" job_script += "eval `scramv1 runtime -sh` #cmsenv\n" job_script += "\n" job_script += cmd.replace("%%FILE%%", f).replace( "%%PROCESS%%", f_dict["process"]).replace("%%N%%", str(i + 1)) + "\n" if data_dir.eos: job_script += "xrdcp -f job_{}.root root://{}/{}\n".format( i + 1, Site.url_eos, out_path) else: job_script += "mv -f job_{}.root {}\n".format(i + 1, out_path) with open("{}/{}.sh".format(path, job_name), "w") as out: out.write(job_script) ## Make condor configs: for i in range(len(files)): job_name = "analyzer_job{}_{}".format(i + 1, tstring) job_config = "universe = vanilla\n" job_config += "Executable = {}.sh\n".format(job_name) job_config += "Should_Transfer_Files = YES\n" job_config += "WhenToTransferOutput = ON_EXIT\n" job_config += "Transfer_Input_Files = {}\n".format( ",".join(files_for_condor)) # job_config += "Output_Destination = results\n" # job_config += "Transfer_Output_Files = job_{}.root\n".format(i+1) job_config += "Transfer_Output_Files = \"\"\n" job_config += "Output = logs/{}.stdout\n".format(job_name) job_config += "Error = logs/{}.stderr\n".format(job_name) job_config += "Log = logs/{}.log\n".format(job_name) if Site.name == "hexcms": job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n" job_config += "x509userproxy = $ENV(HOME)/myproxy\n" else: job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n" job_config += "request_memory = {}\n".format(memory) job_config += "Queue 1\n" with open("{}/{}.jdl".format(path, job_name), "w") as out: out.write(job_config) ## Make run script: run_script = "#!/bin/bash\n" run_script += "\n" run_script += "# Grid proxy existence & expiration check:\n" run_script += "PCHECK=`voms-proxy-info -timeleft`\n" run_script += "if [[ ($? -ne 0) || (\"$PCHECK\" -eq 0) ]]; then\n" run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n" run_script += "fi\n" run_script += "\n" run_script += "# Make tarball:\n" run_script += "echo 'Making a tarball of the CMSSW area ...'\n" run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n" run_script += "\n" run_script += "# Prepare output directory:\n" if data_dir.eos: run_script += "eos root://{} mkdir -p {}\n".format( Site.url_eos, out_path) else: run_script += "mkdir -p {}\n".format(out_path) run_script += "\n" run_script += "# Submit condor jobs:\n" for i in range(len(files)): job_name = "analyzer_job{}_{}".format(i + 1, tstring) run_script += "condor_submit {}.jdl\n".format(job_name) with open("{}/run.sh".format(path), "w") as out: out.write(run_script) print "The jobs are in {}".format(path) return path