d = "/".join(jec_path.split("/")[:-1]) prefix = jec_path.split("/")[-1] if not os.path.exists(d): return False txts = [f for f in os.listdir(d) if ".txt" in f] for flavor in flavors: if not "{}_{}_{}_{}PFchs.txt".format(prefix, ("MC", "DATA")[data == True], flavor, algorithm.upper()) in txts: return False return True # /FUNCTIONS: # SET UP: ## Very basic variables: out_dir_default = "~/temp" # This is where output goes when it's not put into EOS by CRAB. cmssw_version = cmssw.get_version() # The CMSSW version that this configuration file is using. ## Construct process: process = cms.Process("fatjets") ## Set up variables and options: options = VarParsing('analysis') ### General options: options.register ('crab', False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Turn this on from inside crab configuration files." ) options.register ('data', False,
def main(): # Arguments: a = variables.arguments() miniaods = dataset.fetch_entries("miniaod", a.query) tstring = utilities.time_string()[:-4] suffix = "cutpt{}".format(cut_pt_filter) cmssw_version = cmssw.get_version(parsed=False) for miniaod in miniaods: print "Making condor setup for {} ...".format(miniaod.Name) sample = miniaod.get_sample() # Create groups of input files: groups = [] group = [] n_group = 0 # print miniaod.ns for i, n in enumerate(miniaod.ns): n_group += n group.append(miniaod.files[i]) if (n_group >= n_per) or (i == len(miniaod.ns) - 1): groups.append(group) group = [] n_group = 0 print "\tCreating {} jobs ...".format(len(groups)) # Prepare directories: path = "condor_jobs/tuplizer/{}/{}_{}_{}".format(tstring, miniaod.subprocess, miniaod.generation, suffix) log_path = path + "/logs" if not os.path.exists(path): os.makedirs(path) if not os.path.exists(log_path): os.makedirs(log_path) eos_path = "/store/user/tote/{}/tuple_{}_{}_{}/{}".format( sample.name, miniaod.subprocess, miniaod.generation, suffix, tstring ) # Output path. # Create job scripts: for i, group in enumerate(groups): job_script = "#!/bin/bash\n" job_script += "\n" job_script += "# Untar CMSSW area:\n" job_script += "tar -xzf {}.tar.gz\n".format(cmssw_version) job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format(cmssw_version) job_script += "\n" job_script += "# Setup CMSSW:\n" job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" job_script += "eval `scramv1 runtime -sh` #cmsenv\n" job_script += "\n" job_script += "# Run CMSSW:\n" list_str = ",".join(['"{}"'.format(g) for g in group]) out_file = "tuple_{}_{}_{}_{}.root".format(miniaod.subprocess, miniaod.generation, suffix, i + 1) job_script += 'cmsRun tuplizer_cfg.py subprocess="{}" generation="{}" cutPtFilter={} outDir="." outFile="{}" inFile={}'.format( miniaod.subprocess, miniaod.generation, cut_pt_filter, out_file, list_str ) if sample.data: job_script += " data=True".format(sample.data) if sample.mask: job_script += ' mask="{}"'.format(sample.mask) job_script += " &&\n" job_script += "xrdcp -f {} root://cmseos.fnal.gov/{} &&\n".format(out_file, eos_path) job_script += "rm {}\n".format(out_file) with open("{}/job_{}.sh".format(path, i + 1), "w") as out: out.write(job_script) # Create condor configs: for i, group in enumerate(groups): job_config = "universe = vanilla\n" job_config += "Executable = job_{}.sh\n".format(i + 1) job_config += "Should_Transfer_Files = YES\n" job_config += "WhenToTransferOutput = ON_EXIT\n" job_config += "Transfer_Input_Files = {}.tar.gz\n".format(cmssw_version) job_config += 'Transfer_Output_Files = ""\n' job_config += "Output = logs/job_{}.stdout\n".format(i + 1) job_config += "Error = logs/job_{}.stderr\n".format(i + 1) job_config += "Log = logs/job_{}.log\n".format(i + 1) job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n" job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n" job_config += "Queue 1\n" with open("{}/job_{}.jdl".format(path, i + 1), "w") as out: out.write(job_config) # Create run script: run_script = "# Update cache info:\n" run_script += "bash $HOME/condor/cache.sh\n" run_script += "\n" run_script += "# Grid proxy existence & expiration check:\n" run_script += "PCHECK=`voms-proxy-info -timeleft`\n" run_script += 'if [[ ($? -ne 0) || ("$PCHECK" -eq 0) ]]; then\n' run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n" run_script += "fi\n" run_script += "\n" run_script += "# Copy python packages to CMSSW area:\n" run_script += "cp -r $HOME/decortication/decortication $CMSSW_BASE/python\n" run_script += "cp -r $HOME/decortication/resources $CMSSW_BASE/python\n" run_script += "cp -r $HOME/truculence/truculence $CMSSW_BASE/python\n" run_script += "\n" run_script += "# Make tarball:\n" run_script += "echo 'Making a tarball of the CMSSW area ...'\n" run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n" run_script += "\n" run_script += "# Prepare EOS:\n" run_script += "eos root://cmseos.fnal.gov mkdir -p {}\n".format(eos_path) run_script += "\n" run_script += "# Submit condor jobs:\n" for i, group in enumerate(groups): run_script += "condor_submit job_{}.jdl\n".format(i + 1) run_script += "\n" run_script += "# Remove tarball:\n" run_script += "#rm ${CMSSW_VERSION}.tar.gz\n" # I if remove this, the jobs might complain. run_script += "\n" run_script += "# Remove python packages:\n" run_script += "#rm -rf $CMSSW_BASE/python/decortication\n" run_script += "#rm -rf $CMSSW_BASE/python/resources\n" run_script += "#rm -rf $CMSSW_BASE/python/truculence\n" with open("{}/run.sh".format(path), "w") as out: out.write(run_script) print "\tThe jobs are in {}".format(path) return True
def create_jobs(self, cmd="", memory=2000, input_files=None): # Create condor jobs for each input file. # if not cmd: # print "ERROR (analyzer.create_jobs): You need to specify a cmd to run for each job." # return False # Define variables: Site = dataset.Site data_dir = Site.get_dir("data") cmssw_version = cmssw.get_version(parsed=False) if not cmd: cmd = "python {}.py -f %%FILE%% -o job_%%N%%.root".format( self.name) tstring = utilities.time_string()[:-4] path = "condor_jobs/{}/{}".format(self.name, tstring) log_path = path + "/logs" # out_path = path + "/results" out_path = os.path.join(data_dir.path, "analyzer_jobs", tstring) # Output path. files_for_condor = [ "{}/{}.py".format(os.getcwd(), self.name), "{}.tar.gz".format(cmssw_version) ] if isinstance(input_files, str): input_files = [input_files] if input_files: input_files = [ os.getcwd() + "/" + f for f in input_files if "/" not in f ] if input_files: files_for_condor.extend(input_files) # Make directories for p in [path, log_path]: if not os.path.exists(p): os.makedirs(p) # Make job files: files = self.get_files(info=True) ## Make job scripts: for i, f_dict in enumerate(files): f = f_dict["file"] if f[:12] == "/store/user/": f = "root://cmseos.fnal.gov/" + f job_name = "analyzer_job{}_{}".format(i + 1, tstring) job_script = "#!/bin/bash\n" job_script += "\n" job_script += "# Untar CMSSW area:\n" job_script += "tar -xzf {}.tar.gz &&\n".format(cmssw_version) for input_file in input_files: input_file = input_file.split("/")[-1] if "CMSSW_" not in f: job_script += "cp {} {}/src/Analyzers/FatjetAnalyzer/test\n".format( input_file, cmssw_version) job_script += "cd {}/src/Analyzers/FatjetAnalyzer/test\n".format( cmssw_version) job_script += "\n" job_script += "# Setup CMSSW:\n" job_script += "source /cvmfs/cms.cern.ch/cmsset_default.sh\n" job_script += "scramv1 b ProjectRename\n" job_script += "eval `scramv1 runtime -sh` #cmsenv\n" job_script += "\n" job_script += cmd.replace("%%FILE%%", f).replace( "%%PROCESS%%", f_dict["process"]).replace("%%N%%", str(i + 1)) + "\n" if data_dir.eos: job_script += "xrdcp -f job_{}.root root://{}/{}\n".format( i + 1, Site.url_eos, out_path) else: job_script += "mv -f job_{}.root {}\n".format(i + 1, out_path) with open("{}/{}.sh".format(path, job_name), "w") as out: out.write(job_script) ## Make condor configs: for i in range(len(files)): job_name = "analyzer_job{}_{}".format(i + 1, tstring) job_config = "universe = vanilla\n" job_config += "Executable = {}.sh\n".format(job_name) job_config += "Should_Transfer_Files = YES\n" job_config += "WhenToTransferOutput = ON_EXIT\n" job_config += "Transfer_Input_Files = {}\n".format( ",".join(files_for_condor)) # job_config += "Output_Destination = results\n" # job_config += "Transfer_Output_Files = job_{}.root\n".format(i+1) job_config += "Transfer_Output_Files = \"\"\n" job_config += "Output = logs/{}.stdout\n".format(job_name) job_config += "Error = logs/{}.stderr\n".format(job_name) job_config += "Log = logs/{}.log\n".format(job_name) if Site.name == "hexcms": job_config += "notify_user = ${LOGNAME}@FNAL.GOV\n" job_config += "x509userproxy = $ENV(HOME)/myproxy\n" else: job_config += "x509userproxy = $ENV(X509_USER_PROXY)\n" job_config += "request_memory = {}\n".format(memory) job_config += "Queue 1\n" with open("{}/{}.jdl".format(path, job_name), "w") as out: out.write(job_config) ## Make run script: run_script = "#!/bin/bash\n" run_script += "\n" run_script += "# Grid proxy existence & expiration check:\n" run_script += "PCHECK=`voms-proxy-info -timeleft`\n" run_script += "if [[ ($? -ne 0) || (\"$PCHECK\" -eq 0) ]]; then\n" run_script += "\tvoms-proxy-init -voms cms --valid 168:00\n" run_script += "fi\n" run_script += "\n" run_script += "# Make tarball:\n" run_script += "echo 'Making a tarball of the CMSSW area ...'\n" run_script += "tar --exclude-caches-all -zcf ${CMSSW_VERSION}.tar.gz -C ${CMSSW_BASE}/.. ${CMSSW_VERSION}\n" run_script += "\n" run_script += "# Prepare output directory:\n" if data_dir.eos: run_script += "eos root://{} mkdir -p {}\n".format( Site.url_eos, out_path) else: run_script += "mkdir -p {}\n".format(out_path) run_script += "\n" run_script += "# Submit condor jobs:\n" for i in range(len(files)): job_name = "analyzer_job{}_{}".format(i + 1, tstring) run_script += "condor_submit {}.jdl\n".format(job_name) with open("{}/run.sh".format(path), "w") as out: out.write(run_script) print "The jobs are in {}".format(path) return path