def __init__( self, configDir, outputDir, output_file, executable, projection_module, samples, max_files_per_job, era, plot, check_output_files, running_method, num_parallel_jobs, pool_id='', verbose=False, dry_run=False, use_home=False, submission_cmd=None, ): self.configDir = configDir self.outputDir = outputDir self.executable = executable self.projection_module = projection_module self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.plot = plot self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.use_home = use_home if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join( self.configDir, "Makefile_{}".format(self.projection_module)) self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.output_file = os.path.join(self.outputDir, output_file) self.stdout_file_path = os.path.join( self.configDir, "stdout_{}.log".format(self.projection_module)) self.stderr_file_path = os.path.join( self.configDir, "stderr_{}.log".format(self.projection_module)) self.sw_ver_file_cfg = os.path.join( self.configDir, "VERSION_{}.log".format(self.projection_module)) self.sw_ver_file_out = os.path.join( self.outputDir, "VERSION_{}.log".format(self.projection_module)) self.submission_out = os.path.join(self.configDir, "SUBMISSION.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out)) check_submission_cmd(self.submission_out, submission_cmd) self.sbatchFile_projection = os.path.join( self.configDir, "sbatch_{}.py".format(self.projection_module)) self.cfgFiles_projection = {} self.logFiles_projection = {} self.scriptFiles_projection = {} self.jobOptions_sbatch = {} self.inputFiles = {} self.outputFiles_tmp = {} self.outputFiles = {} self.phoniesToAdd = [] self.filesToClean = [] self.targets = [] self.makefile_target = "sbatch_{}".format(self.projection_module) self.dirs = {} all_dirs = [ DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS, DKEY_SCRIPTS, DKEY_HADD_RT ] cfg_dirs = [ DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT ] ref_genWeightsFile = os.path.join( os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'refGenWeight_{}.txt'.format(self.era)) self.ref_genWeights = load_refGenWeightsFromFile( ref_genWeightsFile) if projection_module != 'puHist' else {} for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] key_dir = getKey(process_name) for dir_type in all_dirs: if dir_type == DKEY_PLOTS: continue initDict(self.dirs, [key_dir, dir_type]) if dir_type in cfg_dirs: self.dirs[key_dir][dir_type] = os.path.join( self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in cfg_dirs: initDict(self.dirs, [dir_type]) self.dirs[dir_type] = os.path.join(self.configDir, dir_type) self.cvmfs_error_log = {} self.num_jobs = { 'hadd': 0, 'project': 0, 'plot': 0, }
def __init__(self, configDir, outputDir, cfgFile_prodNtuple, samples, max_files_per_job, era, preselection_cuts, leptonSelection, hadTauWP, check_output_files, running_method, version, num_parallel_jobs, pileup, golden_json, dry_run, isDebug, gen_matching_by_index, use_nonnominal, use_home, skip_tools_step, do_sync, verbose = False, pool_id = '', submission_cmd = None, ): self.configDir = configDir self.outputDir = outputDir self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.preselection_cuts = preselection_cuts self.leptonSelection = leptonSelection self.hadTauWP = hadTauWP self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.isDebug = isDebug self.gen_matching_by_index = gen_matching_by_index self.use_nonnominal = use_nonnominal self.use_home = use_home self.pileup = pileup self.golden_json = golden_json if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) if not os.path.isfile(self.pileup): raise ValueError('No such file: %s' % self.pileup) self.pileup_histograms = get_pileup_histograms(self.pileup) if not os.path.isfile(self.golden_json): raise ValueError('No such file: %s' % self.golden_json) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple") self.num_parallel_jobs = num_parallel_jobs self.skip_tools_step = skip_tools_step self.do_sync = do_sync self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join( os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates' ) logging.info("Templates directory is: %s" % self.template_dir) self.version = version self.samples = samples create_if_not_exists(self.configDir) create_if_not_exists(self.outputDir) self.stdout_file_path = os.path.join(self.configDir, "stdout_prodNtuple.log") self.stderr_file_path = os.path.join(self.configDir, "stderr_prodNtuple.log") self.sw_ver_file_cfg = os.path.join(self.configDir, "VERSION_prodNtuple.log") self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_prodNtuple.log") self.submission_out = os.path.join(self.configDir, "SUBMISSION.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(( self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out )) check_submission_cmd(self.submission_out, submission_cmd) self.cfgFile_prodNtuple_original = os.path.join(self.template_dir, cfgFile_prodNtuple) self.sbatchFile_prodNtuple = os.path.join(self.configDir, "sbatch_prodNtuple.py") self.cfgFiles_prodNtuple_modified = {} self.logFiles_prodNtuple = {} self.inputFiles = {} self.outputFiles = {} self.filesToClean = [] self.dirs = {} for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [ DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS ]: initDict(self.dirs, [ key_dir, dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, process_name) for dir_type in [ DKEY_CFGS, DKEY_LOGS ]: initDict(self.dirs, [ dir_type ]) if dir_type in [ DKEY_CFGS, DKEY_LOGS ]: self.dirs[dir_type] = os.path.join(self.configDir, dir_type) else: self.dirs[dir_type] = os.path.join(self.outputDir, dir_type) self.cvmfs_error_log = {} self.executable = "produceNtuple.sh"
def __init__( self, configDir, localDir, outputDir, output_file, executable, samples, max_files_per_job, era, binning, use_gen_weight, check_output_files, running_method, num_parallel_jobs, pool_id='', verbose=False, dry_run=False, use_home=False, keep_logs=False, submission_cmd=None, ): self.configDir = configDir self.localDir = localDir self.outputDir = outputDir self.executable = executable self.max_num_jobs = 200000 self.samples = samples self.max_files_per_job = max_files_per_job self.era = era self.binning = binning self.use_gen_weight = use_gen_weight self.check_output_files = check_output_files self.verbose = verbose self.dry_run = dry_run self.use_home = use_home self.keep_logs = keep_logs if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = self.running_method.lower() == "sbatch" self.is_makefile = not self.is_sbatch self.makefile = os.path.join(self.localDir, "Makefile_nonResDenom") self.num_parallel_jobs = num_parallel_jobs self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: %s" % self.workingDir) self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau', 'test', 'templates') logging.info("Templates directory is: %s" % self.template_dir) create_if_not_exists(self.configDir) create_if_not_exists(self.localDir) create_if_not_exists(self.outputDir) self.output_file = os.path.join(self.outputDir, output_file) self.stdout_file_path = os.path.join(self.localDir, "stdout_nonResDenom.log") self.stderr_file_path = os.path.join(self.localDir, "stderr_nonResDenom.log") self.sw_ver_file_cfg = os.path.join(self.localDir, "VERSION_nonResDenom.log") self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_nonResDenom.log") self.submission_out = os.path.join(self.localDir, "SUBMISSION_nonResDenom.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out)) check_submission_cmd(self.submission_out, submission_cmd) self.sbatchFile_nonResDenom = os.path.join(self.localDir, "sbatch_nonResDenom.py") self.cfgFiles_nonResDenom = {} self.logFiles_nonResDenom = {} self.scriptFiles_nonResDenom = {} self.jobOptions_sbatch = {} self.inputFiles = {} self.outputFiles_tmp = {} self.outputFiles = {} self.phoniesToAdd = [] self.filesToClean = [self.configDir] self.targets = [] self.dirs = {} all_dirs = [ DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS, DKEY_SCRIPTS, DKEY_HADD_RT ] cfg_dirs = [ DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT ] self.gen_weights = {} if self.use_gen_weight: ref_genweights = os.path.join(os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau', 'data', 'refGenWeight_{}.txt'.format(era)) with open(ref_genweights, 'r') as f: for line in f: line_split = line.strip().split() assert (len(line_split) == 2) sample_name = line_split[0] ref_genweight = float(line_split[1]) assert (sample_name not in self.gen_weights) self.gen_weights[sample_name] = ref_genweight for sample_name, sample_info in self.samples.items(): if not sample_info['use_it']: continue process_name = sample_info["process_name_specific"] if self.use_gen_weight: assert (re.sub('_duplicate$', '', process_name) in self.gen_weights) key_dir = getKey(process_name) for dir_type in all_dirs: if dir_type == DKEY_PLOTS: continue initDict(self.dirs, [key_dir, dir_type]) if dir_type in cfg_dirs: dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir self.dirs[key_dir][dir_type] = os.path.join( dir_choice, dir_type, process_name) else: self.dirs[key_dir][dir_type] = os.path.join( self.outputDir, dir_type, process_name) for dir_type in cfg_dirs: initDict(self.dirs, [dir_type]) dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir self.dirs[dir_type] = os.path.join(dir_choice, dir_type) if dir_choice != self.configDir: self.filesToClean.append(self.dirs[dir_type]) self.cvmfs_error_log = {} self.num_jobs = { 'hadd': 0, 'nonResDenom': 0, 'plot': 0, }
def __init__(self, treeName, outputDir, cfgDir, executable_addMEM, samples, era, check_output_files, running_method, max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs, leptonSelection, hadTauSelection, integration_choice, jet_cleaning_by_index, dry_run, use_nonnominal, use_home, channel, rle_filter_file = '', submission_cmd = None, pool_id = '', max_jobs_per_sample = -1, ): self.treeName = treeName self.outputDir = outputDir self.cfgDir = cfgDir self.executable_addMEM = executable_addMEM self.mem_integrations_per_job = mem_integrations_per_job self.max_files_per_job = max_files_per_job self.max_mem_integrations = max_mem_integrations self.max_jobs_per_sample = max_jobs_per_sample self.samples = samples self.era = era self.check_output_files = check_output_files self.channel = channel self.rle_filter_file = rle_filter_file self.leptonSelection = leptonSelection self.hadTauSelection = hadTauSelection if self.hadTauSelection: self.hadTauDefinition = self.hadTauSelection.split('|')[0] self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1] else: self.hadTauDefinition = None self.hadTauWorkingPoint = None self.maxPermutations_branchName = None self.integration_choice = integration_choice self.jet_cleaning_by_index = jet_cleaning_by_index logging.info( "Number of integration points: %s" % self.integration_choice ) if running_method.lower() not in ["sbatch", "makefile"]: raise ValueError("Invalid running method: %s" % running_method) self.running_method = running_method self.is_sbatch = False self.is_makefile = False if self.running_method.lower() == "sbatch": self.is_sbatch = True else: self.is_makefile = True self.makefile = os.path.join( self.cfgDir, "Makefile_%s" % self.channel) self.num_parallel_jobs = num_parallel_jobs self.dry_run = dry_run self.use_nonnominal = use_nonnominal self.use_home = use_home self.pool_id = pool_id if pool_id else uuid.uuid4() self.workingDir = os.getcwd() logging.info("Working directory is: {workingDir}".format(workingDir = self.workingDir)) for dirPath in [self.outputDir, self.cfgDir]: create_if_not_exists(dirPath) self.stdout_file_path = os.path.join(self.cfgDir, "stdout_%s.log" % self.channel) self.stderr_file_path = os.path.join(self.cfgDir, "stderr_%s.log" % self.channel) self.sw_ver_file_cfg = os.path.join(self.cfgDir, "VERSION_%s.log" % self.channel) self.sw_ver_file_out = os.path.join(self.outputDir, "VERSION_%s.log" % self.channel) self.submission_out = os.path.join(self.cfgDir, "SUBMISSION_%s.log" % self.channel) self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(( self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out )) check_submission_cmd(self.submission_out, submission_cmd) self.dirs = {} self.samples = samples self.cfgFiles_addMEM_modified = {} self.shFiles_addMEM_modified = {} self.logFiles_addMEM = {} self.sbatchFile_addMEM = os.path.join(self.cfgDir, "sbatch_addMEM_%s.py" % self.channel) self.inputFiles = {} self.outputFiles = {} self.hadd_records = {} self.filesToClean = [] del self.samples['sum_events'] for sample_name, sample_info in self.samples.items(): if not sample_info["use_it"]: continue process_name = sample_info["process_name_specific"] key_dir = getKey(sample_name) for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name) for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]: initDict(self.dirs, [key_dir, dir_type]) self.dirs[key_dir][dir_type] = os.path.join(self.cfgDir, dir_type, self.channel, process_name) self.cvmfs_error_log = {}
def __init__( self, config_dir, output_dir, output_filename, version, era, channels, dry_run, check_output_files, running_method, isDebug, rle_select, with_mem, use_nonnominal, hlt_filter, tau_id_wp, tau_id, use_home, systematics_label, use_preselected, jet_cleaning, gen_matching, regroup_jerc=False, project_dir=os.path.join(os.getenv('CMSSW_BASE'), 'src', 'tthAnalysis', 'HiggsToTauTau'), file_pattern='tthAnalyzeRun_%s.py', suffix='', submission_cmd=None, mode=None, ): self.running_method = running_method self.dry_run = dry_run self.check_output_files = check_output_files self.use_home = use_home executable_pattern = os.path.join(project_dir, 'test', file_pattern) self.config_dir = config_dir self.hadd_script_dir_path = os.path.join(self.config_dir, DKEY_SCRIPTS, DKEY_SYNC) self.hadd_log_dir_path = os.path.join( self.config_dir, DKEY_LOGS, DKEY_SYNC, ) self.hadd_script_path = os.path.join(self.hadd_script_dir_path, 'hadd_sync.py') self.hadd_log_wrapper_path = os.path.join(self.hadd_log_dir_path, 'hadd_sync_wrapper.log') self.hadd_log_executable_path = os.path.join( self.hadd_log_dir_path, 'hadd_sync_executable.log') self.output_dir = output_dir final_output_dir = os.path.join(self.output_dir, DKEY_SYNC) self.final_output_file = os.path.join(final_output_dir, output_filename) create_if_not_exists(self.config_dir) create_if_not_exists(self.output_dir) submission_out = os.path.join(self.config_dir, "SUBMISSION_sync.log") submission_out, = get_log_version((submission_out, )) check_submission_cmd(submission_out, submission_cmd) systematic_labels = ' '.join(systematics_label) common_args = "-v %s -e %s -s %s -y %s " % ( version, era, systematic_labels, use_home) if jet_cleaning: common_args += " -q %s " % jet_cleaning if gen_matching: common_args += " -g %s " % gen_matching additional_args = " -E" if self.dry_run: additional_args += " -d" if check_output_files: additional_args += " -C" if isDebug: additional_args += " -D" if rle_select: additional_args += " -S '%s'" % rle_select if use_nonnominal: additional_args += " -O" if hlt_filter: additional_args += " -H" if tau_id: additional_args += " -t %s" % tau_id if tau_id_wp: additional_args += " -w %s" % tau_id_wp if self.running_method: additional_args += " -R %s" % self.running_method if regroup_jerc: additional_args += " -G" mem_channels = ['2lss_1tau', '3l', 'hh_bb2l'] cr_channels = ['3l', '4l'] inclusive_args = '-v %s -e %s' % (version, era) if systematic_labels != 'internal': inclusive_args += ' -s %s' % systematic_labels inclusive_args += additional_args common_args += additional_args channels_extended = collections.OrderedDict() for channel in channels: channels_extended[channel] = '' if channel in cr_channels: channels_extended[channel + 'ctrl'] = ' -c' self.channels_to_validate = [] self.channel_info = {} for channel in channels_extended: if channel not in ['ttWctrl', 'ttZctrl', 'WZctrl', 'ZZctrl' ] and 'inclusive' not in channel: self.channels_to_validate.append(channel) input_file = os.path.join(final_output_dir, '%s.root' % channel) executable_channel = channel if channel.replace('ctrl', '') in cr_channels: executable_channel = channel.replace('ctrl', '') channel_script = executable_pattern % executable_channel channel_makefile = os.path.join(self.config_dir, 'Makefile_%s' % channel) channel_outlog = os.path.join(self.config_dir, 'stdout_sync_%s.log' % channel) channel_errlog = os.path.join(self.config_dir, 'stderr_sync_%s.log' % channel) channel_outlog_create = os.path.join( self.config_dir, 'stdout_sync_create_%s.log' % channel) channel_errlog_create = os.path.join( self.config_dir, 'stderr_sync_create_%s.log' % channel) channel_outlog, channel_errlog, channel_outlog_create, channel_errlog_create = get_log_version( ( channel_outlog, channel_errlog, channel_outlog_create, channel_errlog_create, )) cmd_args = common_args if 'inclusive' not in channel else inclusive_args if 'inclusive' not in channel: cmd_args += " -p %s" % use_preselected mode_str = '' if mode: mode_str = '{}_sync'.format(mode) elif 'inclusive' not in channel: mode_str = 'sync' if mode_str and with_mem and channel in mem_channels: mode_str = '{}_wMEM'.format(mode_str) if mode_str: cmd_args += ' -m %s' % mode_str cmd_args += channels_extended[channel] channel_cmd_create = '%s %s 2>%s 1>%s' % \ (channel_script, cmd_args, channel_errlog_create, channel_outlog_create) channel_cmd_run = '$(MAKE) -j 5 -f %s all 2>%s 1>%s' % ( channel_makefile, channel_errlog, channel_outlog) channel_cmd_clean = '$(MAKE) -f %s clean' % channel_makefile if self.running_method.lower() == "makefile": channel_cmd_run = "\n\t".join([ "mkdir -p {}".format(channel), channel_cmd_run.replace('$(MAKE)', '$(MAKE) -C {}'.format(channel)), "rm -r {}".format(channel), ]) self.channel_info[input_file] = { 'create': channel_cmd_create, 'run': channel_cmd_run, 'clean': channel_cmd_clean, } self.stdout_file_path = os.path.join(self.config_dir, "stdout_sync.log") self.stderr_file_path = os.path.join(self.config_dir, "stderr_sync.log") self.sw_ver_file_cfg = os.path.join(self.config_dir, "VERSION_sync.log") self.sw_ver_file_out = os.path.join(self.output_dir, "VERSION_sync.log") self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version( (self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out)) self.makefile_path = os.path.join(self.config_dir, 'Makefile_sync') if suffix: self.makefile_path += "_{}".format(suffix)