def clf_channels(self, clf, category, region, cuts=None, bins=10, limits=None, mass=None, mode=None, systematics=True, unblind=False, hybrid_data=False, no_signal_fixes=False, uniform=False, mva=False, min_score=None, max_score=None, include_signal=True): """ Return a HistFactory Channel for each mass hypothesis """ log.info("constructing channels") # determine min and max scores scores_obj = self.get_scores( clf, category, region, cuts=cuts, masses=[mass], mode=mode, systematics=systematics, unblind=unblind) data_scores = scores_obj.data_scores bkg_scores = scores_obj.bkg_scores all_sig_scores = scores_obj.all_sig_scores if min_score is None: min_score = scores_obj.min_score if max_score is None: max_score = scores_obj.max_score if isinstance(bins, int): if limits is not None: low, high = limits binning = Hist(bins, low, high, type='D') else: binning = Hist(bins, min_score, max_score, type='D') else: # iterable if bins[0] > min_score: log.warning("min score is less than first edge " "(will be underflow)") if bins[-1] <= max_score: log.warning("max score is greater than or equal to last edge " "(will be overflow)") binning = Hist(bins, type='D') bkg_samples = [] for s, scores in bkg_scores: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, min_score=min_score, max_score=max_score , cuts=cuts, scores=scores, systematics=systematics, uniform=uniform, mva=mva) bkg_samples.append(sample) data_sample = None if data_scores is not None: hist_template = binning.Clone( title=self.data.label, **self.data.hist_decor) data_sample = self.data.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=data_scores, uniform=uniform) if unblind is False: # blind full histogram data_sample.hist[:] = (0, 0) elif (unblind is not True) and isinstance(unblind, int): # blind highest N bins data_sample.hist[-(unblind + 1):] = (0, 0) elif isinstance(unblind, float): # blind above a signal efficiency max_unblind_score = efficiency_cut( sum([histogram_scores(hist_template, scores) for s, scores in all_sig_scores[mass]]), unblind) blind_bin = hist_template.FindBin(max_unblind_score) data_sample.hist[blind_bin:] = (0, 0) # create signal HistFactory samples sig_samples = [] if include_signal: for s, scores in all_sig_scores[mass]: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=scores, no_signal_fixes=no_signal_fixes, systematics=systematics, uniform=uniform, mva=mva) sig_samples.append(sample) # replace data in blind bins with signal + background if hybrid_data and (unblind is not True): sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)]) if unblind is False: # replace full hist data_sample.hist[:] = sum_sig_bkg[:] elif isinstance(unblind, int): # replace highest N bins bin = -(unblind + 1) data_sample.hist[bin:] = sum_sig_bkg[bin:] elif isinstance(unblind, float): data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:] # create channel for this mass point channel = histfactory.make_channel( 'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass), bkg_samples + sig_samples, data=data_sample) return scores_obj, channel
def plot_clf( background_scores, category, signal_scores=None, signal_scale=1.0, data_scores=None, name=None, draw_histograms=True, draw_data=False, save_histograms=False, hist_template=None, bins=10, min_score=0, max_score=1, signal_colors=cm.spring, systematics=None, unblind=False, **kwargs ): if hist_template is None: if hasattr(bins, "__iter__"): # variable width bins hist_template = Hist(bins) min_score = min(bins) max_score = max(bins) else: hist_template = Hist(bins, min_score, max_score) bkg_hists = [] for bkg, scores_dict in background_scores: hist = hist_template.Clone(title=bkg.label) scores, weight = scores_dict["NOMINAL"] fill_hist(hist, scores, weight) hist.decorate(**bkg.hist_decor) hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == "NOMINAL": continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) hist.systematics[sys_term] = sys_hist bkg_hists.append(hist) if signal_scores is not None: sig_hists = [] for sig, scores_dict in signal_scores: sig_hist = hist_template.Clone(title=sig.label) scores, weight = scores_dict["NOMINAL"] fill_hist(sig_hist, scores, weight) sig_hist.decorate(**sig.hist_decor) sig_hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == "NOMINAL": continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) sig_hist.systematics[sys_term] = sys_hist sig_hists.append(sig_hist) else: sig_hists = None if data_scores is not None and draw_data and unblind is not False: data, data_scores = data_scores if isinstance(unblind, float): if sig_hists is not None: # unblind up to `unblind` % signal efficiency sum_sig = sum(sig_hists) cut = efficiency_cut(sum_sig, 0.3) data_scores = data_scores[data_scores < cut] data_hist = hist_template.Clone(title=data.label) data_hist.decorate(**data.hist_decor) fill_hist(data_hist, data_scores) if unblind >= 1 or unblind is True: log.info("Data events: %d" % sum(data_hist)) log.info("Model events: %f" % sum(sum(bkg_hists))) for hist in bkg_hists: log.info("{0} {1}".format(hist.GetTitle(), sum(hist))) log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists)))) else: data_hist = None if draw_histograms: output_name = "event_bdt_score" if name is not None: output_name += "_" + name for logy in (False, True): draw( data=data_hist, model=bkg_hists, signal=sig_hists, signal_scale=signal_scale, category=category, name="BDT Score", output_name=output_name, show_ratio=data_hist is not None, model_colors=None, signal_colors=signal_colors, systematics=systematics, logy=logy, **kwargs ) return bkg_hists, sig_hists, data_hist
def clf_channels(self, clf, category, region, cuts=None, bins=10, limits=None, mass=None, mode=None, systematics=True, unblind=False, hybrid_data=False, no_signal_fixes=False, uniform=False, mva=False): """ Return a HistFactory Channel for each mass hypothesis """ log.info("constructing channels") # determine min and max scores scores_obj = self.get_scores( clf, category, region, cuts=cuts, masses=[mass], mode=mode, systematics=systematics, unblind=unblind) data_scores = scores_obj.data_scores bkg_scores = scores_obj.bkg_scores all_sig_scores = scores_obj.all_sig_scores min_score = scores_obj.min_score max_score = scores_obj.max_score if isinstance(bins, int): if limits is not None: low, high = limits binning = Hist(bins, low, high, type='D') else: binning = Hist(bins, min_score, max_score, type='D') else: # iterable if bins[0] > min_score: log.warning("min score is less than first edge " "(will be underflow)") if bins[-1] <= max_score: log.warning("max score is greater than or equal to last edge " "(will be overflow)") binning = Hist(bins, type='D') bkg_samples = [] for s, scores in bkg_scores: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=scores, systematics=systematics, uniform=uniform, mva=mva) bkg_samples.append(sample) data_sample = None if data_scores is not None: hist_template = binning.Clone( title=self.data.label, **self.data.hist_decor) data_sample = self.data.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=data_scores, uniform=uniform) if unblind is False: # blind full histogram data_sample.hist[:] = (0, 0) elif (unblind is not True) and isinstance(unblind, int): # blind highest N bins data_sample.hist[-(unblind + 1):] = (0, 0) elif isinstance(unblind, float): # blind above a signal efficiency max_unblind_score = efficiency_cut( sum([histogram_scores(hist_template, scores) for s, scores in all_sig_scores[mass]]), unblind) blind_bin = hist_template.FindBin(max_unblind_score) data_sample.hist[blind_bin:] = (0, 0) # create signal HistFactory samples sig_samples = [] for s, scores in all_sig_scores[mass]: hist_template = binning.Clone( title=s.label, **s.hist_decor) sample = s.get_histfactory_sample( hist_template, clf, category, region, cuts=cuts, scores=scores, no_signal_fixes=no_signal_fixes, systematics=systematics, uniform=uniform, mva=mva) sig_samples.append(sample) # replace data in blind bins with signal + background if hybrid_data and (unblind is not True): sum_sig_bkg = sum([s.hist for s in (bkg_samples + sig_samples)]) if unblind is False: # replace full hist data_sample.hist[:] = sum_sig_bkg[:] elif isinstance(unblind, int): # replace highest N bins bin = -(unblind + 1) data_sample.hist[bin:] = sum_sig_bkg[bin:] elif isinstance(unblind, float): data_sample.hist[blind_bin:] = sum_sig_bkg[blind_bin:] # create channel for this mass point channel = histfactory.make_channel( 'hh_{0}_{1}_{2}'.format(self.year % 1000, category.name, mass), bkg_samples + sig_samples, data=data_sample) return scores_obj, channel
def plot_clf(background_scores, category, signal_scores=None, signal_scale=1., data_scores=None, name=None, draw_histograms=True, draw_data=False, save_histograms=False, hist_template=None, bins=10, min_score=0, max_score=1, signal_colors=cm.spring, systematics=None, unblind=False, **kwargs): if hist_template is None: if hasattr(bins, '__iter__'): # variable width bins hist_template = Hist(bins) min_score = min(bins) max_score = max(bins) else: hist_template = Hist(bins, min_score, max_score) bkg_hists = [] for bkg, scores_dict in background_scores: hist = hist_template.Clone(title=bkg.label) scores, weight = scores_dict['NOMINAL'] fill_hist(hist, scores, weight) hist.decorate(**bkg.hist_decor) hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) hist.systematics[sys_term] = sys_hist bkg_hists.append(hist) if signal_scores is not None: sig_hists = [] for sig, scores_dict in signal_scores: sig_hist = hist_template.Clone(title=sig.label) scores, weight = scores_dict['NOMINAL'] fill_hist(sig_hist, scores, weight) sig_hist.decorate(**sig.hist_decor) sig_hist.systematics = {} for sys_term in scores_dict.keys(): if sys_term == 'NOMINAL': continue sys_hist = hist_template.Clone() scores, weight = scores_dict[sys_term] fill_hist(sys_hist, scores, weight) sig_hist.systematics[sys_term] = sys_hist sig_hists.append(sig_hist) else: sig_hists = None if data_scores is not None and draw_data and unblind is not False: data, data_scores = data_scores if isinstance(unblind, float): if sig_hists is not None: # unblind up to `unblind` % signal efficiency sum_sig = sum(sig_hists) cut = efficiency_cut(sum_sig, 0.3) data_scores = data_scores[data_scores < cut] data_hist = hist_template.Clone(title=data.label) data_hist.decorate(**data.hist_decor) fill_hist(data_hist, data_scores) if unblind >= 1 or unblind is True: log.info("Data events: %d" % sum(data_hist)) log.info("Model events: %f" % sum(sum(bkg_hists))) for hist in bkg_hists: log.info("{0} {1}".format(hist.GetTitle(), sum(hist))) log.info("Data / Model: %f" % (sum(data_hist) / sum(sum(bkg_hists)))) else: data_hist = None if draw_histograms: output_name = 'event_bdt_score' if name is not None: output_name += '_' + name for logy in (False, True): draw(data=data_hist, model=bkg_hists, signal=sig_hists, signal_scale=signal_scale, category=category, name="BDT Score", output_name=output_name, show_ratio=data_hist is not None, model_colors=None, signal_colors=signal_colors, systematics=systematics, logy=logy, **kwargs) return bkg_hists, sig_hists, data_hist