def extract_nuisance_parameters_from_lhe_file(filename, systematics): """ Extracts the definition of nuisance parameters from the LHE file """ logger.debug("Parsing nuisance parameter setup from LHE file at %s", filename) # Nuisance parameters (output) nuisance_params = OrderedDict() # When no systematics setup is defined if systematics is None: return nuisance_params # Parse scale factors from strings in systematics logger.debug("Systematics setup: %s", systematics) systematics_scales = [] for key, value in six.iteritems(systematics): if key in ["mur", "muf", "mu"]: scale_factors = value.split(",") scale_factors = [float(sf) for sf in scale_factors] if len(scale_factors) == 0: raise RuntimeError("Cannot parse scale factor string %s", value) elif len(scale_factors) == 1: scale_factors = (scale_factors[0], ) else: scale_factors = (scale_factors[-1], scale_factors[0]) systematics_scales.append(scale_factors) else: systematics_scales.append(None) # Untar and parse LHE file root, _ = _untar_and_parse_lhe_file(filename) # Find weight groups try: weight_groups = root.findall("header")[0].findall( "initrwgt")[0].findall("weightgroup") except KeyError as e: raise RuntimeError("Could not find weight groups in LHE file!\n%s", e) if len(weight_groups) == 0: raise RuntimeError("Zero weight groups in LHE file!") # What have we already found? systematics_scale_done = [] for val in systematics_scales: if val is None: systematics_scale_done.append([True, True]) elif len(val) == 1: systematics_scale_done.append([False, True]) else: systematics_scale_done.append([False, False]) systematics_pdf_done = False # Loop over weight groups and weights and identify benchmarks for wg in weight_groups: try: wg_name = wg.attrib["name"] except KeyError: logger.warning("Weight group does not have name attribute") continue if "mg_reweighting" in wg_name.lower(): # Physics reweighting logger.debug("Found physics reweighting weight group %s", wg_name) continue elif ("mu" in systematics or "muf" in systematics or "mur" in systematics) and "scale variation" in wg_name.lower( ): # Found scale variation weight group logger.debug("Found scale variation weight group %s", wg_name) weights = wg.findall("weight") for weight in weights: try: weight_id = str(weight.attrib["id"]) weight_muf = float(weight.attrib["MUF"]) weight_mur = float(weight.attrib["MUR"]) except KeyError: logger.warning( "Scale variation weight does not have all expected attributes" ) continue logging.debug( "Found scale variation weight %s / muf = %s, mur = %s", weight_id, weight_muf, weight_mur) # Let's skip the entries with a varied dynamical scale for now weight_dynscale = None for key in ["dynscale", "dyn_scale", "DYNSCALE", "DYN_SCALE"]: try: weight_dynscale = int(weight.attrib["dynscale"]) except KeyError: pass if weight_dynscale is not None: continue # Matching time! for i, (syst_name, syst_scales, syst_done) in enumerate( zip(systematics.keys(), systematics_scales, systematics_scale_done)): if syst_name == "mur": for k in [0, 1]: if (not syst_done[k] and approx_equal( weight_mur, syst_scales[k]) and approx_equal(weight_muf, 1.0)): try: benchmarks = nuisance_params[syst_name] except KeyError: benchmarks = [None, None] benchmarks[k] = weight_id nuisance_params[syst_name] = benchmarks systematics_scale_done[i][k] = True break if syst_name == "muf": for k in [0, 1]: if (not syst_done[k] and approx_equal(weight_mur, 1.0) and approx_equal(weight_muf, syst_scales[k])): try: benchmarks = nuisance_params[syst_name] except KeyError: benchmarks = [None, None] benchmarks[k] = weight_id nuisance_params[syst_name] = benchmarks systematics_scale_done[i][k] = True break if syst_name == "mu": for k in [0, 1]: if (not syst_done[k] and approx_equal( weight_mur, syst_scales[k]) and approx_equal(weight_muf, syst_scales[k])): try: benchmarks = nuisance_params[syst_name] except KeyError: benchmarks = [None, None] benchmarks[k] = weight_id nuisance_params[syst_name] = benchmarks systematics_scale_done[i][k] = True break elif "pdf" in systematics and (systematics["pdf"] in wg_name.lower() or "pdf" in wg_name.lower() or "ct" in wg_name.lower()): # PDF reweighting logger.debug("Found PDF variation weight group %s", wg_name) weights = wg.findall("weight") for i, weight in enumerate(weights): try: weight_id = str(weight.attrib["id"]) weight_pdf = int(weight.attrib["PDF"]) except KeyError: logger.warning( "Scale variation weight does not have all expected attributes" ) continue logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf) # Add every PDF Hessian direction to nuisance parameters nuisance_params["pdf_{}".format(i)] = [weight_id, None] systematics_pdf_done = True else: logging.debug("Found other weight group %s", wg_name) # Check that everything was found if "pdf" in systematics.keys() and not systematics_pdf_done: logger.warning( "Could not find weights for the PDF uncertainties in LHE file! The most common source of this" " error is not having installed LHAPDF with its Python interface. Please make sure that you " " have installed this. You can also check the log file produced by MadGraph for a warning" " about this. If LHAPDF is correctly installed and you still get this warning, please check" " manually whether the LHE file at %s contains weights from PDF variation, and contact" " the MadMiner developer team about this. If you continue with the analysis, MadMiner" " will disregard PDF uncertainties.", filename, ) for syst_name, (done1, done2) in zip(systematics.keys(), systematics_scale_done): if not (done1 and done2): logger.warning( "Did not find benchmarks representing scale variation uncertainty %s in LHE file!", syst_name) logger.warning( "Could not find weights for the scale uncertainty %s in LHE file! The most common source of " " this error is not having installed LHAPDF with its Python interface. Please make sure that" " you have installed this. You can also check the log file produced by MadGraph for a " "warning about this. If LHAPDF is correctly installed and you still get this warning, please" " check manually whether the LHE file at %s contains weights from PDF variation, and contact" " the MadMiner developer team about this. If you continue with the analysis, MadMiner" " will disregard PDF uncertainties.", syst_name, filename, ) return nuisance_params
def _extract_nuisance_param_dict(weight_groups, systematics_name, systematics_definition): logger.debug("Extracting nuisance parameter information for systematic %s", systematics_name) syst_type = systematics_definition[0] if syst_type == "norm": nuisance_param_name = "{}_nuisance_param_0".format(systematics_name) benchmark_name = "{}_benchmark_0".format(nuisance_param_name) nuisance_param_definition = (benchmark_name, None), (None, None), systematics_definition[1] return {nuisance_param_name: nuisance_param_definition} elif syst_type == "scale": # Prepare output nuisance_param_definition_parts = [] # Parse scale variations we need to find scale_factors = systematics_definition[2].split(",") scale_factors = [float(sf) for sf in scale_factors] if len(scale_factors) == 0: raise RuntimeError("Cannot parse scale factor string %s", value) elif len(scale_factors) == 1: scale_factors = (scale_factors[0], ) else: scale_factors = (scale_factors[-1], scale_factors[0]) # Loop over scale factors for k, scale_factor in enumerate(scale_factors): muf = scale_factor if systematics_definition[1] in ["mu", "muf" ] else 1.0 mur = scale_factor if systematics_definition[1] in ["mu", "mur" ] else 1.0 # Loop over weight groups and weights and identify benchmarks for wg in weight_groups: try: wg_name = wg.attrib["name"] except KeyError: logger.warning( "New weight group: does not have name attribute, skipping" ) continue logger.debug("New weight group: %s", wg_name) if "mg_reweighting" in wg_name.lower( ) or "scale variation" not in wg_name.lower(): continue logger.debug("Weight group identified as scale variation") weights = wg.findall("weight") for weight in weights: try: weight_id = str(weight.attrib["id"]) weight_muf = float(weight.attrib["MUF"]) weight_mur = float(weight.attrib["MUR"]) except KeyError: logger.warning( "Scale variation weight does not have all expected attributes" ) continue logging.debug( "Found scale variation weight %s / muf = %s, mur = %s", weight_id, weight_muf, weight_mur) # Let's skip the entries with a varied dynamical scale for now weight_dynscale = None for key in [ "dynscale", "dyn_scale", "DYNSCALE", "DYN_SCALE" ]: try: weight_dynscale = int(weight.attrib[key]) except KeyError: pass if weight_dynscale is not None: continue # Matching time! if approx_equal(weight_mur, mur) and approx_equal( weight_muf, muf): benchmark_name = "{}_nuisance_param_0_benchmark_{}".format( systematics_name, k) nuisance_param_definition_parts.append( (benchmark_name, weight_id)) break if len(nuisance_param_definition_parts) < len(scale_factors): logger.warning( "Could not find weights for the scale uncertainty %s in LHE file! The most common source of " " this error is not having installed LHAPDF with its Python interface. Please make sure that" " you have installed this. You can also check the log file produced by MadGraph for a " "warning about this. If LHAPDF is correctly installed and you still get this warning, please" " check manually whether the LHE file contains weights from PDF variation, and contact" " the MadMiner developer team about this. If you continue with the analysis, MadMiner" " will disregard PDF uncertainties.", systematics_name, ) return {} else: # Output nuisance_param_name = "{}_nuisance_param_0".format( systematics_name) if len(nuisance_param_definition_parts) > 1: nuisance_dict = { nuisance_param_name: (nuisance_param_definition_parts[0], nuisance_param_definition_parts[1], None) } else: nuisance_dict = { nuisance_param_name: (nuisance_param_definition_parts[0], (None, None), None) } return nuisance_dict elif syst_type == "pdf": nuisance_dict = OrderedDict() # Loop over weight groups and weights and identify benchmarks for wg in weight_groups: try: wg_name = wg.attrib["name"] except KeyError: logger.warning( "New wWeight group: does not have name attribute, skipping" ) continue logger.debug("New weight group: %s", wg_name) if "mg_reweighting" in wg_name.lower() or not ( systematics_definition[1] in wg_name.lower() or "pdf" in wg_name.lower() or "ct" in wg_name.lower()): continue logger.debug("Weight group identified as PDF variation") weights = wg.findall("weight") for i, weight in enumerate(weights): try: weight_id = str(weight.attrib["id"]) weight_pdf = int(weight.attrib["PDF"]) except KeyError: logger.warning( "Scale variation weight does not have all expected attributes" ) continue if weight_pdf % 1000 == 0: # Central element, not eigenvector of covariance matrix logger.debug( "Identifying PDF weight %s / %s as central element", weight_id, weight_pdf) continue logger.debug("Found PDF weight %s / %s", weight_id, weight_pdf) # Add every PDF Hessian direction to nuisance parameters nuisance_param_name = "{}_nuisance_param_{}".format( systematics_name, i) benchmark_name = "{}_benchmark_0".format(nuisance_param_name) nuisance_dict[nuisance_param_name] = (benchmark_name, weight_id), (None, None), None # Check that everything was found if len(nuisance_dict) < 0: logger.warning( "Could not find weights for the PDF uncertainties in LHE file! The most common source of this" " error is not having installed LHAPDF with its Python interface. Please make sure that you " " have installed this. You can also check the log file produced by MadGraph for a warning" " about this. If LHAPDF is correctly installed and you still get this warning, please check" " manually whether the LHE file at %s contains weights from PDF variation, and contact" " the MadMiner developer team about this. If you continue with the analysis, MadMiner" " will disregard PDF uncertainties.") return nuisance_dict else: raise RuntimeError("Unknown systematics type %s", syst_type)