def add_shapes(model, obs, proc, uncs, filename, hname, hname_with_systematics, include_uncertainties): if filename not in add_shapes_rootfiles: add_shapes_rootfiles[filename] = rootfile(filename) rf = add_shapes_rootfiles[filename] theta_obs = transform_name_to_theta(obs) theta_proc = transform_name_to_theta(proc) hname = hname.replace('$CHANNEL', obs) hname_with_systematics = hname_with_systematics.replace('$CHANNEL', obs) if proc == 'DATA': hname_tmp = hname.replace('$PROCESS', 'DATA') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: hname_tmp = hname.replace('$PROCESS', 'data_obs') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: raise RuntimeError, "did not find data histogram in rootfile" model.set_data_histogram(theta_obs, histo, reset_binning = True) return hf = model.get_histogram_function(theta_obs, theta_proc) assert hf is not None, "model has no process '%s' in channel '%s'" % (theta_proc, theta_obs) assert len(hf.get_parameters())==0, "model has non-trivial shape uncertainty already" old_nominal_histogram = hf.get_nominal_histo() assert len(old_nominal_histogram[2])==1, "expected a counting-only histogram with only one bin" hname = hname.replace('$PROCESS', proc) hname_with_systematics = hname_with_systematics.replace('$PROCESS', proc) nominal_histogram = rf.get_histogram(hname, include_uncertainties = include_uncertainties) if utils.reldiff(sum(old_nominal_histogram[2]), sum(nominal_histogram[2])) > 0.01 and abs(sum(old_nominal_histogram[2]) - sum(nominal_histogram[2])) > 1e-4: raise RuntimeError, "add_shapes: histogram normalisation given in datacard and from root file differ by more than >1% (and absolute difference is > 1e-4)" hf.set_nominal_histo(nominal_histogram, reset_binning = True) model.reset_binning(theta_obs, nominal_histogram[0], nominal_histogram[1], len(nominal_histogram[2])) if len(uncs) == 0: return for u in uncs: theta_unc = transform_name_to_theta(u) if '$DIRECTION_' in hname_with_systematics: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u) hname_minus = hname_plus hname_plus = hname_plus.replace('$DIRECTION_plusminus', 'plus') hname_minus = hname_plus.replace('$DIRECTION_plusminus', 'minus') hname_plus = hname_plus.replace('$DIRECTION_updown', 'up') hname_minus = hname_plus.replace('$DIRECTION_updown', 'down') else: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Up') hname_minus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Down') histo_plus = rf.get_histogram(hname_plus, include_uncertainties = include_uncertainties, fail_with_exception = True) histo_minus = rf.get_histogram(hname_minus, include_uncertainties = include_uncertainties, fail_with_exception = True) # make the rate uncertainty part of the coefficient function, i.e., normalize plus and minus histograms # to nominal and add a lognormal uncertainty to the coefficient function: lambda_plus = math.log(sum(histo_plus[2]) / sum(nominal_histogram[2])) * uncs[u] lambda_minus = -math.log(sum(histo_minus[2]) / sum(nominal_histogram[2])) * uncs[u] model.get_coeff(theta_obs, theta_proc).add_factor('exp', parameter = u, lambda_plus = lambda_plus, lambda_minus = lambda_minus) f_plus = sum(nominal_histogram[2]) / sum(histo_plus[2]) utils.mul_list(histo_plus[2], f_plus) f_minus = sum(nominal_histogram[2]) / sum(histo_minus[2]) utils.mul_list(histo_minus[2], f_minus) hf.set_syst_histos(u, histo_plus, histo_minus, uncs[u]) hf.normalize_to_nominal = True
def add_shapes(model, obs, proc, uncs, filename, hname, hname_with_systematics, include_uncertainties, searchpaths = ['.'], variables = {}): if filename not in add_shapes_rootfiles: path = None for s in searchpaths: if os.path.isfile(os.path.join(s, filename)): path = s break if path is None: raise RuntimeError, "did not find file '%s' in the paths %s" % (filename, str(searchpaths)) add_shapes_rootfiles[filename] = rootfile(os.path.join(path, filename)) rf = add_shapes_rootfiles[filename] theta_obs = transform_name_to_theta(obs) theta_proc = transform_name_to_theta(proc) hname = hname.replace('$CHANNEL', obs) hname_with_systematics = hname_with_systematics.replace('$CHANNEL', obs) for varname, value in variables.iteritems(): hname = hname.replace('$%s' % varname, value) hname_with_systematics = hname_with_systematics.replace('$%s' % varname, value) if proc == 'DATA': hname_tmp = hname.replace('$PROCESS', 'DATA') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: hname_tmp = hname.replace('$PROCESS', 'data_obs') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: if _verbose: print "note: did not find data histogram in %s" % rf.get_filename() raise RuntimeError, "did not find histo" model.set_data_histogram(theta_obs, histo, reset_binning = True) return hf = model.get_histogram_function(theta_obs, theta_proc) assert hf is not None, "model has no process '%s' in channel '%s'" % (theta_proc, theta_obs) assert len(hf.get_parameters())==0, "model has non-trivial shape uncertainty already" old_nominal_histogram = hf.get_nominal_histo() assert len(old_nominal_histogram[2])==1, "expected a counting-only histogram with only one bin" hname = hname.replace('$PROCESS', proc) hname_with_systematics = hname_with_systematics.replace('$PROCESS', proc) nominal_histogram = rf.get_histogram(hname, include_uncertainties = include_uncertainties) if nominal_histogram is None: if _verbose: print "note: did not find histogram %s in %s" % (hname, rf.get_filename()) raise RuntimeError, "did not find histo" if utils.reldiff(old_nominal_histogram.get_value_sum(), nominal_histogram.get_value_sum()) > 0.01 and abs(old_nominal_histogram.get_value_sum() - nominal_histogram.get_value_sum()) > 1e-4: raise RuntimeError, "add_shapes: histogram normalisation given in datacard and from root file differ by more than >1% (and absolute difference is > 1e-4)" hf.set_nominal_histo(nominal_histogram, reset_binning = True) model.reset_binning(theta_obs, nominal_histogram[0], nominal_histogram[1], len(nominal_histogram[2])) if len(uncs) == 0: return for u in uncs: theta_unc = transform_name_to_theta(u) if '$DIRECTION_' in hname_with_systematics: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u) hname_minus = hname_plus hname_plus = hname_plus.replace('$DIRECTION_plusminus', 'plus') hname_minus = hname_plus.replace('$DIRECTION_plusminus', 'minus') hname_plus = hname_plus.replace('$DIRECTION_updown', 'up') hname_minus = hname_plus.replace('$DIRECTION_updown', 'down') else: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Up') hname_minus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Down') histo_plus = rf.get_histogram(hname_plus, include_uncertainties = include_uncertainties) if histo_plus is None: if _verbose: print "note: did not find histogram %s in %s" % (hname_plus, rf.get_filename()) raise RuntimeError, "did not find histo" histo_minus = rf.get_histogram(hname_minus, include_uncertainties = include_uncertainties) if histo_minus is None: if _verbose: print "note: did not find histogram %s in %s" % (hname_minus, rf.get_filename()) raise RuntimeError, "did not find histo" # make the rate uncertainty part of the coefficient function, i.e., normalize plus and minus histograms # to nominal and add a lognormal uncertainty to the coefficient function: lambda_plus = math.log(histo_plus.get_value_sum() / nominal_histogram.get_value_sum()) * uncs[u] lambda_minus = -math.log(histo_minus.get_value_sum() / nominal_histogram.get_value_sum()) * uncs[u] model.get_coeff(theta_obs, theta_proc).add_factor('exp', parameter = u, lambda_plus = lambda_plus, lambda_minus = lambda_minus) f_plus = nominal_histogram.get_value_sum() / histo_plus.get_value_sum() histo_plus = histo_plus.scale(f_plus) f_minus = nominal_histogram.get_value_sum() / histo_minus.get_value_sum() histo_minus = histo_minus.scale(f_minus) hf.set_syst_histos(u, histo_plus, histo_minus, uncs[u]) hf.normalize_to_nominal = True
def add_shapes(model, obs, proc, uncs, filename, hname, hname_with_systematics, include_uncertainties, searchpaths = ['.'], variables = {}, rhandling = 'renormalize-lognormal'): assert rhandling in ('renormalize-lognormal', 'morph') if filename not in add_shapes_rootfiles: path = None for s in searchpaths: if os.path.isfile(os.path.join(s, filename)): path = s break if path is None: raise NotFoundException, "did not find file '%s' in the paths %s" % (filename, str(searchpaths)) add_shapes_rootfiles[filename] = rootfile(os.path.join(path, filename)) rf = add_shapes_rootfiles[filename] theta_obs = transform_name_to_theta(obs) theta_proc = transform_name_to_theta(proc) hname = hname.replace('$CHANNEL', obs) hname_with_systematics = hname_with_systematics.replace('$CHANNEL', obs) for varname, value in variables.iteritems(): hname = hname.replace('$%s' % varname, value) hname_with_systematics = hname_with_systematics.replace('$%s' % varname, value) if proc == 'DATA': hname_tmp = hname.replace('$PROCESS', 'DATA') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: hname_tmp = hname.replace('$PROCESS', 'data_obs') histo = rf.get_histogram(hname_tmp, include_uncertainties = False) if histo is None: if _debug: print "note: did not find data histogram in %s" % rf.get_filename() raise NotFoundException, "did not find histo" model.set_data_histogram(theta_obs, histo, reset_binning = True) return hf = model.get_histogram_function(theta_obs, theta_proc) assert hf is not None, "model has no process '%s' in channel '%s'" % (theta_proc, theta_obs) assert len(hf.get_parameters())==0, "model has non-trivial shape uncertainty already" old_nominal_histogram = hf.get_nominal_histo() assert len(old_nominal_histogram[2])==1, "expected a counting-only histogram with only one bin" hname = hname.replace('$PROCESS', proc) hname_with_systematics = hname_with_systematics.replace('$PROCESS', proc) nominal_histogram = rf.get_histogram(hname, include_uncertainties = include_uncertainties) if nominal_histogram is None: if _debug: print "note: did not find histogram %s in %s" % (hname, rf.get_filename()) raise NotFoundException, "did not find histo" if _debug: print "norm(%s) = %.3f" % (hname, nominal_histogram.get_value_sum()) # check that histogram in rootfile matches definition in datacard (allow deviations up to 1% / 1e-4 absolute): nominal_is_zero = False if old_nominal_histogram.get_value_sum() > 0.0 or nominal_histogram.get_value_sum() > 0.0: if old_nominal_histogram.get_value_sum() != -1.0 and utils.reldiff(old_nominal_histogram.get_value_sum(), nominal_histogram.get_value_sum()) > 0.01 and abs(old_nominal_histogram.get_value_sum() - nominal_histogram.get_value_sum()) > 1e-4: raise InconsistentDataException("add_shapes: histogram normalisation given in datacard and from root file differ by more than 1%% " "(and absolute difference is > 1e-4) for channel %s, process %s (histogram name '%s')" % (obs, proc, hname)) else: print "WARNING: channel '%s' process '%s': yield is <=0. Process will ALWAYS have 0 contribution; please delete it from the datacard." % (obs, proc) nominal_is_zero = True # even for nominal_is_zero, make sure to set the histogram to ensure that the binning is correct: hf.set_nominal_histo(nominal_histogram, reset_binning = True) model.reset_binning(theta_obs, nominal_histogram[0], nominal_histogram[1], len(nominal_histogram[2])) if len(uncs) == 0: return if nominal_is_zero: return for u in uncs: theta_unc = transform_name_to_theta(u) if '$DIRECTION_' in hname_with_systematics: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u) hname_minus = hname_plus hname_plus = hname_plus.replace('$DIRECTION_plusminus', 'plus') hname_minus = hname_plus.replace('$DIRECTION_plusminus', 'minus') hname_plus = hname_plus.replace('$DIRECTION_updown', 'up') hname_minus = hname_plus.replace('$DIRECTION_updown', 'down') else: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Up') hname_minus = hname_with_systematics.replace('$SYSTEMATIC', u + 'Down') histo_plus = rf.get_histogram(hname_plus, include_uncertainties = include_uncertainties) if histo_plus is None: if _debug: print "note: did not find histogram %s in %s" % (hname_plus, rf.get_filename()) raise NotFoundException, "did not find histo" histo_minus = rf.get_histogram(hname_minus, include_uncertainties = include_uncertainties) if histo_minus is None: if _debug: print "note: did not find histogram %s in %s" % (hname_minus, rf.get_filename()) raise NotFoundException, "did not find histo" if _debug: print "norm(%s) = %.3f" % (hname_plus, histo_plus.get_value_sum()) print "norm(%s) = %.3f" % (hname_minus, histo_minus.get_value_sum()) if rhandling == 'renormalize-lognormal': # make the rate uncertainty part of the coefficient function, i.e., normalize plus and minus histograms # to nominal and add a lognormal uncertainty to the coefficient function: lambda_plus = math.log(histo_plus.get_value_sum() / nominal_histogram.get_value_sum()) * uncs[u] lambda_minus = -math.log(histo_minus.get_value_sum() / nominal_histogram.get_value_sum()) * uncs[u] model.get_coeff(theta_obs, theta_proc).add_factor('exp', parameter = u, lambda_plus = lambda_plus, lambda_minus = lambda_minus) f_plus = nominal_histogram.get_value_sum() / histo_plus.get_value_sum() histo_plus = histo_plus.scale(f_plus) f_minus = nominal_histogram.get_value_sum() / histo_minus.get_value_sum() histo_minus = histo_minus.scale(f_minus) hf.set_syst_histos(u, histo_plus, histo_minus, uncs[u]) hf.normalize_to_nominal = True else: hf.set_syst_histos(u, histo_plus, histo_minus, uncs[u])
def add_shapes(model, obs, proc, uncs, filename, hname, hname_with_systematics, include_uncertainties): if filename not in add_shapes_rootfiles: add_shapes_rootfiles[filename] = rootfile(filename) rf = add_shapes_rootfiles[filename] theta_obs = transform_name_to_theta(obs) theta_proc = transform_name_to_theta(proc) hname = hname.replace('$CHANNEL', obs) hname_with_systematics = hname_with_systematics.replace('$CHANNEL', obs) if proc == 'DATA': hname_tmp = hname.replace('$PROCESS', 'DATA') histo = rf.get_histogram(hname_tmp, include_uncertainties=False) if histo is None: hname_tmp = hname.replace('$PROCESS', 'data_obs') histo = rf.get_histogram(hname_tmp, include_uncertainties=False) if histo is None: raise RuntimeError, "did not find data histogram in rootfile" model.set_data_histogram(theta_obs, histo, reset_binning=True) return hf = model.get_histogram_function(theta_obs, theta_proc) assert hf is not None, "model has no process '%s' in channel '%s'" % ( theta_proc, theta_obs) assert len(hf.get_parameters() ) == 0, "model has non-trivial shape uncertainty already" old_nominal_histogram = hf.get_nominal_histo() assert len(old_nominal_histogram[2] ) == 1, "expected a counting-only histogram with only one bin" hname = hname.replace('$PROCESS', proc) hname_with_systematics = hname_with_systematics.replace('$PROCESS', proc) nominal_histogram = rf.get_histogram( hname, include_uncertainties=include_uncertainties) if utils.reldiff(sum(old_nominal_histogram[2]), sum( nominal_histogram[2])) > 0.01 and abs( sum(old_nominal_histogram[2]) - sum(nominal_histogram[2])) > 1e-4: raise RuntimeError, "add_shapes: histogram normalisation given in datacard and from root file differ by more than >1% (and absolute difference is > 1e-4)" hf.set_nominal_histo(nominal_histogram, reset_binning=True) model.reset_binning(theta_obs, nominal_histogram[0], nominal_histogram[1], len(nominal_histogram[2])) if len(uncs) == 0: return for u in uncs: theta_unc = transform_name_to_theta(u) if '$DIRECTION_' in hname_with_systematics: hname_plus = hname_with_systematics.replace('$SYSTEMATIC', u) hname_minus = hname_plus hname_plus = hname_plus.replace('$DIRECTION_plusminus', 'plus') hname_minus = hname_plus.replace('$DIRECTION_plusminus', 'minus') hname_plus = hname_plus.replace('$DIRECTION_updown', 'up') hname_minus = hname_plus.replace('$DIRECTION_updown', 'down') else: hname_plus = hname_with_systematics.replace( '$SYSTEMATIC', u + 'Up') hname_minus = hname_with_systematics.replace( '$SYSTEMATIC', u + 'Down') histo_plus = rf.get_histogram( hname_plus, include_uncertainties=include_uncertainties, fail_with_exception=True) histo_minus = rf.get_histogram( hname_minus, include_uncertainties=include_uncertainties, fail_with_exception=True) # make the rate uncertainty part of the coefficient function, i.e., normalize plus and minus histograms # to nominal and add a lognormal uncertainty to the coefficient function: lambda_plus = math.log( sum(histo_plus[2]) / sum(nominal_histogram[2])) * uncs[u] lambda_minus = -math.log( sum(histo_minus[2]) / sum(nominal_histogram[2])) * uncs[u] model.get_coeff(theta_obs, theta_proc).add_factor('exp', parameter=u, lambda_plus=lambda_plus, lambda_minus=lambda_minus) f_plus = sum(nominal_histogram[2]) / sum(histo_plus[2]) utils.mul_list(histo_plus[2], f_plus) f_minus = sum(nominal_histogram[2]) / sum(histo_minus[2]) utils.mul_list(histo_minus[2], f_minus) hf.set_syst_histos(u, histo_plus, histo_minus, uncs[u]) hf.normalize_to_nominal = True