def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, HTTEstimation, ggHEstimation, qqHEstimation, VHEstimation, ZTTEstimation, ZTTEstimationTT, ZLEstimationMTSM, ZLEstimationETSM, ZLEstimationTT, ZJEstimationMT, ZJEstimationET, ZJEstimationTT, WEstimationRaw, TTTEstimationMT, TTTEstimationET, TTTEstimationTT, TTJEstimationMT, TTJEstimationET, TTJEstimationTT, VVEstimation, QCDEstimationMT, QCDEstimationET, QCDEstimationTT, ZTTEmbeddedEstimation, TTLEstimationMT, TTLEstimationET, TTLEstimationTT, TTTTEstimationMT, TTTTEstimationET, EWKWpEstimation, EWKWmEstimation, EWKZllEstimation, EWKZnnEstimation from shape_producer.era import Run2016 era = Run2016(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception ############################################################################ # Channel: mt if args.channel == "mt": channel = MTSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationMTSM(era, args.base_path, channel), ZJEstimationMT(era, args.base_path, channel), TTTEstimationMT(era, args.base_path, channel), #TTLEstimationMT(era, args.base_path, channel), TTJEstimationMT(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Channel: et if args.channel == "et": channel = ETSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for et: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "zll", "ZJ": "zll", "TTT": "tt", "TTL": "tt", "TTJ": "tt", "W": "w", "EWKWp": "w", "EWKWm": "w", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimation(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationETSM(era, args.base_path, channel), ZJEstimationET(era, args.base_path, channel), TTTEstimationET(era, args.base_path, channel), #TTLEstimationET(era, args.base_path, channel), TTJEstimationET(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_ss = copy.deepcopy(channel) channel_ss.cuts.get("os").invert() output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_ss.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "ss" } ############################################################################ # Channel: tt if args.channel == "tt": channel = TTSM() # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for tt: %s", additional_cuts.expand()) # MC-driven processes # NOTE: Define here the mappig of the process estimations to the training classes classes_map = { "ggH": "ggh", "qqH": "qqh", "ZTT": "ztt", "EMB": "ztt", "ZL": "misc", "ZJ": "misc", "TTT": "misc", "TTL": "misc", "TTJ": "misc", "W": "misc", "EWKWp": "misc", "EWKWm": "misc", "VV": "misc", "EWKZll": "misc", "EWKZnn": "misc" } for estimation in [ ggHEstimation(era, args.base_path, channel), qqHEstimation(era, args.base_path, channel), ZTTEstimationTT(era, args.base_path, channel), #ZTTEmbeddedEstimation(era, args.base_path, channel), ZLEstimationTT(era, args.base_path, channel), ZJEstimationTT(era, args.base_path, channel), TTTEstimationTT(era, args.base_path, channel), #TTLEstimationTT(era, args.base_path, channel), TTJEstimationTT(era, args.base_path, channel), WEstimationRaw(era, args.base_path, channel), EWKWpEstimation(era, args.base_path, channel), EWKWmEstimation(era, args.base_path, channel), VVEstimation(era, args.base_path, channel), EWKZllEstimation(era, args.base_path, channel), #EWKZnnEstimation(era, args.base_path, channel) ]: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_iso = copy.deepcopy(channel) channel_iso.cuts.remove("tau_2_iso") channel_iso.cuts.add( Cut("byTightIsolationMVArun2v1DBoldDMwLT_2<0.5", "tau_2_iso")) channel_iso.cuts.add( Cut("byLooseIsolationMVArun2v1DBoldDMwLT_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_iso.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": "noniso" } ############################################################################ # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Define era if "2016" in args.era: from shape_producer.era import Run2016 era = Run2016(args.datasets) elif "2017" in args.era: from shape_producer.era import Run2017 era = Run2017(args.datasets) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception # Load variables variables = yaml.load(open(args.variables))["selected_variables"] # Define bins and range of binning for variables in enabled channels channel_dict = { "em": { "2016": EMSM2016(), "2017": EMSM2017() }, "et": { "2016": ETSM2016(), "2017": ETSM2017() }, "mt": { "2016": MTSM2016(), "2017": MTSM2017() }, "tt": { "2016": TTSM2016(), "2017": TTSM2017() }, } friend_directories_dict = { "em": args.em_friend_directories, "et": args.et_friend_directories, "mt": args.mt_friend_directories, "tt": args.tt_friend_directories, } percentiles = [ 1.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 99.0 ] config = {"gof": {}} for ch in channel_dict: # Get properties if "2016" in args.era: eraname = "2016" elif "2017" in args.era: eraname = "2017" channel = channel_dict[ch][eraname] logger.info("Channel: %s" % ch) dict_ = {} additional_cuts = Cuts() logger.warning("Use additional cuts for %s: %s" % (ch, additional_cuts.expand())) dict_ = get_properties(dict_, era, channel, args.directory, additional_cuts) # Build chain dict_["tree_path"] = "%s_nominal/ntuple" % ch chain = build_chain(dict_, friend_directories_dict[ch]) # Get percentiles and calculate 1d binning binning = get_1d_binning(ch, chain, variables[int(eraname)][ch], percentiles) # Add binning for unrolled 2d distributions binning = add_2d_unrolled_binning(variables[int(eraname)][ch], binning) # Append binning to config config["gof"][ch] = binning # Write config logger.info("Write binning config to %s.", args.output) yaml.dump(config, open(args.output, 'w'))
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["friend_paths"] = args.friend_paths output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import DataEstimation, ggHEstimation, qqHEstimation, ZTTEstimation, ZLEstimation, ZJEstimation, WEstimation, TTTEstimation, TTJEstimation, ZTTEmbeddedEstimation, TTLEstimation, EWKZEstimation, VVLEstimation, VVJEstimation, VVEstimation, VVTEstimation #QCDEstimation_SStoOS_MTETEM, QCDEstimationTT, EWKWpEstimation, EWKWmEstimation, , VHEstimation, HTTEstimation, from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_2017 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2017 era = Run2017(args.database) elif "2018" in args.era: from shape_producer.estimation_methods_2018 import DataEstimation, ZTTEstimation, ZJEstimation, ZLEstimation, TTLEstimation, TTJEstimation, TTTEstimation, VVTEstimation, VVJEstimation, VVLEstimation, WEstimation, ggHEstimation, qqHEstimation, EWKZEstimation, ZTTEmbeddedEstimation from shape_producer.era import Run2018 era = Run2018(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception def estimationMethodAndClassMapGenerator(): ###### common processes classes_map = {"ggH": "ggh", "qqH": "qqh", "EWKZ": "misc"} estimationMethodList = [ ggHEstimation("ggH", era, args.base_path, channel), qqHEstimation("qqH", era, args.base_path, channel), EWKZEstimation(era, args.base_path, channel), VVLEstimation(era, args.base_path, channel), WEstimation(era, args.base_path, channel) ] ######## Check for emb vs MC if args.training_z_estimation_method == "emb": classes_map["EMB"] = "ztt" estimationMethodList.extend( [ZTTEmbeddedEstimation(era, args.base_path, channel)]) elif args.training_z_estimation_method == "mc": classes_map["ZTT"] = "ztt" estimationMethodList.extend([ ZTTEstimation(era, args.base_path, channel), TTTEstimation(era, args.base_path, channel), VVTEstimation(era, args.base_path, channel) ]) else: logger.fatal( "No valid training-z-estimation-method! Options are emb, mc. Argument was {}" .format(args.training_z_estimation_method)) raise Exception ##### TT* zl,zj processes estimationMethodList.extend([ TTLEstimation(era, args.base_path, channel), ZLEstimation(era, args.base_path, channel) ]) # less data-> less categories for tt if args.channel == "tt": classes_map.update({ "TTT": "misc", "TTL": "misc", "TTJ": "misc", "ZL": "misc", "ZJ": "misc" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ## not TTJ,ZJ for em elif args.channel == "em": classes_map.update({"TTT": "tt", "TTL": "tt", "ZL": "misc"}) else: classes_map.update({ "TTT": "tt", "TTL": "tt", "TTJ": "tt", "ZL": "zll", "ZJ": "zll" }) estimationMethodList.extend([ ZJEstimation(era, args.base_path, channel), TTJEstimation(era, args.base_path, channel) ]) ###w: # estimation metho already included, just different mapping fror et and mt if args.channel in ["et", "mt"]: classes_map["W"] = "w" else: classes_map["W"] = "misc" ##### VV/[VVT,VVL,VVJ] split # VVL in common, VVT in "EMBvsMC" if args.channel == "em": classes_map.update({"VVT": "db", "VVL": "db"}) else: classes_map.update({"VVT": "misc", "VVL": "misc", "VVJ": "misc"}) estimationMethodList.extend([ VVJEstimation(era, args.base_path, channel), ]) ### QCD class if args.channel == "tt": classes_map["QCD"] = "noniso" else: classes_map["QCD"] = "ss" return ([classes_map, estimationMethodList]) channelDict = {} channelDict["2016"] = { "mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016() } channelDict["2017"] = { "mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017() } channelDict["2018"] = { "mt": MTSM2018(), "et": ETSM2018(), "tt": TTSM2018(), "em": EMSM2018() } channel = channelDict[args.era][args.channel] # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) classes_map, estimationMethodList = estimationMethodAndClassMapGenerator() ##MC+/Embedding Processes for estimation in estimationMethodList: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } ### # Same sign selection for data-driven QCD estimation = DataEstimation(era, args.base_path, channel) estimation.name = "QCD" channel_qcd = copy.deepcopy(channel) if args.channel != "tt": ## os= opposite sign channel_qcd.cuts.get("os").invert() # Same sign selection for data-driven QCD else: channel_qcd.cuts.remove("tau_2_iso") channel_qcd.cuts.add( Cut("byTightIsolationMVArun2017v2DBoldDMwLT2017_2<0.5", "tau_2_iso")) channel_qcd.cuts.add( Cut("byLooseIsolationMVArun2017v2DBoldDMwLT2017_2>0.5", "tau_2_iso_loose")) output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel_qcd.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), "class": classes_map[estimation.name] } ##################################### # Write output config logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Write arparse arguments to YAML config logger.debug("Write argparse arguments to YAML config.") output_config = {} output_config["base_path"] = args.base_path output_config["friend_paths"] = args.friend_paths output_config["output_path"] = args.output_path output_config["output_filename"] = args.output_filename output_config["tree_path"] = args.tree_path output_config["event_branch"] = args.event_branch output_config["training_weight_branch"] = args.training_weight_branch # Define era if "2016" in args.era: from shape_producer.estimation_methods_2016 import ggHEstimation, qqHEstimation, HWWEstimation from shape_producer.era import Run2016 era = Run2016(args.database) elif "2017" in args.era: from shape_producer.estimation_methods_2017 import ggHEstimation, qqHEstimation, HWWEstimation, DataEstimation from shape_producer.era import Run2017 era = Run2017(args.database) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception def estimationMethodAndClassMapGenerator(): estimationMethodList = [ DataEstimation(era, args.base_path, channel), ggHEstimation("ggH125", era, args.base_path, channel), qqHEstimation("qqH125", era, args.base_path, channel), HWWEstimation(era, args.base_path, channel), ] return (estimationMethodList) channelDict = {} channelDict["2016"] = { "mt": MTSM2016(), "et": ETSM2016(), "tt": TTSM2016(), "em": EMSM2016() } channelDict["2017"] = { "mt": MTSM2017(), "et": ETSM2017(), "tt": TTSM2017(), "em": EMSM2017() } channel = channelDict[args.era][args.channel] # Set up `processes` part of config output_config["processes"] = {} # Additional cuts additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) estimationMethodList = estimationMethodAndClassMapGenerator() for estimation in estimationMethodList: output_config["processes"][estimation.name] = { "files": [ str(f).replace(args.base_path.rstrip("/") + "/", "") for f in estimation.get_files() ], "cut_string": (estimation.get_cuts() + channel.cuts + additional_cuts).expand(), "weight_string": estimation.get_weights().extract(), } # Write output config if not os.path.exists(args.output_path): os.makedirs(args.output_path) logger.info("Write config to file: {}".format(args.output_config)) yaml.dump(output_config, open(args.output_config, 'w'), default_flow_style=False)
def main(args): # Define era if "2016" in args.era: from shape_producer.era import Run2016 era = Run2016(args.datasets) else: logger.fatal("Era {} is not implemented.".format(args.era)) raise Exception # Load variables variables = yaml.load(open(args.variables))["variables"] # Define bins and range of binning for variables in enabled channels channels = ["et", "mt", "tt"] num_borders = 9 min_percentile = 1.0 max_percentile = 99.0 config = {"gof": {}} # Channel: ET if "et" in channels: # Get properties channel = ETSM() logger.info("Channel: et") dict_ = {} additional_cuts = Cuts() logger.warning("Use additional cuts for et: %s", additional_cuts.expand()) dict_ = get_properties(dict_, era, channel, args.directory, additional_cuts) # Build chain dict_["tree_path"] = "et_nominal/ntuple" chain = build_chain(dict_) # Get percentiles and calculate 1d binning binning = get_1d_binning("et", chain, variables, min_percentile, max_percentile, num_borders) # Add binning for unrolled 2d distributions binning = add_2d_unrolled_binning(variables, binning) # Append binning to config config["gof"]["et"] = binning # Channel: MT if "mt" in channels: # Get properties channel = MTSM() logger.info("Channel: mt") dict_ = {} additional_cuts = Cuts() logger.warning("Use additional cuts for mt: %s", additional_cuts.expand()) dict_ = get_properties(dict_, era, channel, args.directory, additional_cuts) # Build chain dict_["tree_path"] = "mt_nominal/ntuple" chain = build_chain(dict_) # Get percentiles binning = get_1d_binning("mt", chain, variables, min_percentile, max_percentile, num_borders) # Add binning for unrolled 2d distributions binning = add_2d_unrolled_binning(variables, binning) # Append binning to config config["gof"]["mt"] = binning # Channel: TT if "tt" in channels: # Get properties channel = TTSM() logger.info("Channel: tt") dict_ = {} additional_cuts = Cuts() logger.warning("Use additional cuts for tt: %s", additional_cuts.expand()) dict_ = get_properties(dict_, era, channel, args.directory, additional_cuts) # Build chain dict_["tree_path"] = "tt_nominal/ntuple" chain = build_chain(dict_) # Get percentiles binning = get_1d_binning("tt", chain, variables, min_percentile, max_percentile, num_borders) # Add binning for unrolled 2d distributions binning = add_2d_unrolled_binning(variables, binning) # Append binning to config config["gof"]["tt"] = binning # Write config logger.info("Write binning config to %s.", args.output) yaml.dump(config, open(args.output, 'w'))