def _normalize(self, cutflows): if self.__normalized: return self.__normalized = True for fullname, hist in self.__hists.items(): if fullname.endswith('Up') or fullname.endswith('Down'): name, _ = fullname.rsplit('_CMS', 1) proc = Process.get(name) else: proc = Process.get(fullname) logging.debug("normalizing histogram {0}, process {1}".format(self.__name, proc)) denom = float(cutflows[proc.cutflow][-3][proc]) factor = 0. if denom == 0. else cutflows[proc.cutflow][-1][proc] / denom hist.Scale(factor)
def _add_legend(self, config, factor): l = Legend(0.05, 3, 0.08) for cfg in config['backgrounds']: bkg, color = cfg.items()[0] l.draw_box(1001, self._eval(color), Process.get(bkg).fullname) l.draw_box(3654, r.kBlack, "Bkg. err.", True) # TODO add collisions l.new_row() for cfg in config['signals']: sig, color = cfg.items()[0] label = Process.get(sig).fullname if factor != 1: label += " (#times {0:.1f})".format(factor) l.draw_line(2, self._eval(color), label) return l
def cutflow(cuts, procs, relative=False, f=sys.stdout): expanded_proc = [Process.expand(proc) for proc in procs] cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc] for cut in cuts] if relative: for i in xrange(1, len(cutdata)): cutdata[-i] = [float(b) / a for a, b in zip(cutdata[-(i + 1)], cutdata[-i])] namelength = max(len(unicode(cut)) for cut in cuts) fieldlengths = [] for proc, subprocs in zip(procs, expanded_proc): val = sum(cuts[0][p] for p in subprocs) length = max(len(proc), len("{:.2f}".format(float(val)))) fieldlengths.append(length) header = u"{{:{0}}}".format(namelength) \ + u"".join(u" {{:{0}}}".format(fl) for fl in fieldlengths) \ + u"\n" format = u"{{:{0}}}".format(namelength) \ + "".join(" {{:{0}.2f}}".format(fl) for fl in fieldlengths) \ + "\n" f.write(header.format("Cut", *procs)) f.write("-" * namelength + "".join(" " + "-" * fl for fl in fieldlengths) + "\n") for cut, data in zip(cuts, cutdata): f.write(format.format(cut, *data))
def get_event_count(cls, f, proc, category, fmt, unweighed): p = cls.__plots['Events'] p.clear() p.read(f, category, Process.expand(proc), fmt=fmt) if unweighed: return p._get_histogram(proc).GetEntries() return p._get_histogram(proc).GetBinContent(1)
def cutflow(cuts, processes, relative=False, weighed=False, f=sys.stdout): expanded_proc = [] procs = [] for proc in processes: subs = [p for p in Process.expand(proc) if str(p) in cuts[0].processes()] if len(subs) > 0: expanded_proc.append(subs) procs.append(proc) cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc] for cut in cuts] if weighed: for n, c in enumerate(reversed(cuts)): if not isinstance(c, StaticCut): break ratios = [a / (b if b != 0 else 1) for a, b in zip(cutdata[-1], cutdata[-(n + 1)])] cutdata = cutdata[:-n] for i in xrange(3, len(cutdata)): cutdata[i] = [a * b for a, b in zip(cutdata[i], ratios)] if relative: for i in xrange(1, len(cutdata)): cutdata[-i] = [(float(b) / a if a != 0 else 0) for a, b in zip(cutdata[-(i + 1)], cutdata[-i])] print_cuts(cuts, procs, cutdata, expanded_proc, "Cut", f, 5 if relative else 2)
def cutflow(cuts, procs, relative=False, f=sys.stdout): expanded_proc = [Process.expand(proc) for proc in procs] cutdata = [[sum(float(cut[p]) for p in ps) for ps in expanded_proc] for cut in cuts] if relative: for i in xrange(1, len(cutdata)): cutdata[-i] = [ float(b) / a for a, b in zip(cutdata[-(i + 1)], cutdata[-i]) ] namelength = max(len(unicode(cut)) for cut in cuts) fieldlengths = [] for proc, subprocs in zip(procs, expanded_proc): val = sum(cuts[0][p] for p in subprocs) length = max(len(proc), len("{:.2f}".format(float(val)))) fieldlengths.append(length) header = u"{{:{0}}}".format(namelength) \ + u"".join(u" {{:{0}}}".format(fl) for fl in fieldlengths) \ + u"\n" format = u"{{:{0}}}".format(namelength) \ + "".join(" {{:{0}.2f}}".format(fl) for fl in fieldlengths) \ + "\n" f.write(header.format("Cut", *procs)) f.write("-" * namelength + "".join(" " + "-" * fl for fl in fieldlengths) + "\n") for cut, data in zip(cuts, cutdata): f.write(format.format(cut, *data))
def _get_histogram(self, process, systematic=None): if isinstance(process, Process): proc = process process = str(process) else: proc = Process.get(process) if isinstance(proc, BasicProcess): scale = 1. if systematic and systematic.startswith('Relative'): scale = max(0, 1 + proc.relativesys() * (1. if systematic.endswith('Up') else -1.)) systematic = None suffix = '_' + systematic if systematic else '' hist = self.__hists[process + suffix].Clone() if hist.ClassName().startswith('TH1'): lastbin = hist.GetNbinsX() overbin = lastbin + 1 err = math.sqrt(hist.GetBinError(lastbin) ** 2 + hist.GetBinError(overbin) ** 2) val = hist.GetBinContent(lastbin) + hist.GetBinContent(overbin) hist.SetBinContent(lastbin, val) hist.SetBinError(lastbin, err) if scale != 1.: hist.Scale(scale) return hist hist = None for p in proc.subprocesses: h = self._get_histogram(p, systematic) if hist: hist.Add(h, proc.factor) else: hist = h.Clone() if not hist: raise KeyError(process) return hist
def read_inputs(config, setup): from ttH.TauRoast.processing import Process fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root") signal = None signal_weights = None for proc, weight in sum([cfg.items() for cfg in setup['signals']], []): for p in sum([Process.expand(proc)], []): logging.debug('reading {}'.format(p)) d = rec2array(root2array(fn, str(p), setup['variables'])) if isinstance(weight, float) or isinstance(weight, int): w = np.array([weight] * len(d)) else: w = rec2array(root2array(fn, str(p), [weight])).ravel() w *= p.cross_section / p.events if signal is not None: signal = np.concatenate((signal, d)) signal_weights = np.concatenate((signal_weights, w)) else: signal = d signal_weights = w background = None background_weights = None for proc, weight in sum([cfg.items() for cfg in setup['backgrounds']], []): for p in sum([Process.expand(proc)], []): logging.debug('reading {}'.format(p)) d = rec2array(root2array(fn, str(p), setup['variables'])) if isinstance(weight, float) or isinstance(weight, int): w = np.array([weight] * len(d)) else: w = rec2array(root2array(fn, str(p), [weight])).ravel() w *= p.cross_section / p.events if background is not None: background = np.concatenate((background, d)) background_weights = np.concatenate((background_weights, w)) else: background = d background_weights = w factor = np.sum(signal_weights) / np.sum(background_weights) logging.info("renormalizing background events by factor {}".format(factor)) background_weights *= factor return signal, signal_weights, background, background_weights
def add_mva(args, config): fn = os.path.join(config["outdir"], "ntuple.root") for proc in set(sum((Process.expand(p) for p in config['plot'] + config['limits']), [])): systematics = ['NA'] if args.systematics: weights = config.get(proc.cutflow + ' weights') systematics = config.get(proc.cutflow + ' systematics', []) systematics = set([s for s, w in expand_systematics(systematics, weights)]) for unc in systematics: logging.info("using systematics: " + unc) proc.add_mva(config, fn, unc)
def _add_legend(self, factor): legend = Legend(0.05, 4, 0.03) if len(self.__backgrounds_present) > 0: legend.draw_marker(20, r.kBlack, "Data") for cfg in self._plotconfig['backgrounds']: props = {'SetFillStyle': 1001} props.update(cfg) bkg = props.pop('process') if bkg not in self.__backgrounds_present: continue legend.draw_box({k: self._eval(v) for (k, v) in props.items()}, Process.get(bkg).fullname, centerline=False) if len(self.__signals_present) > 0: legend.new_row() for cfg in self._plotconfig['signals']: sig, color = cfg.items()[0] if sig not in self.__signals_present: continue label = Process.get(sig).fullname if factor != 1: label += " (#times {0:.1f})".format(factor) legend.draw_line(2, self._eval(color), label) legend.new_row() return legend
def write(self, file, cutflows, category, systematics=None, procs=None, fmt="{p}_{c}_{v}"): """Write histograms of the plot to `file`. Use the specified `category` and normalize histograms via the cutflows passed by `cutflows`. Can be limited to processes specified in `procs`. Optionally, use `systematics`, and write with the format specified in `fmt`, where `p` is the process name, `c` the category, and `v` the limit name of the current plot. Systematics always get appended to the format string (with an underscore). """ self._normalize(cutflows) if systematics is None: systematics = [] systematics = set(systematics + ['NA']) uncertainties = [] for systematic in systematics: if systematic == 'NA': uncertainties.append((None, '')) else: uncertainties.append((systematic + 'Up', '_{}Up'.format(systematic))) uncertainties.append((systematic + 'Down', '_{}Down'.format(systematic))) if procs is None: procs = [Process.get(k) for k in self.__hists if (not k.endswith('Up')) and (not k.endswith('Down'))] else: procs = map(Process.get, procs) for proc in procs: for uncertainty, suffix in uncertainties: histname = fmt.format(p=proc.limitname, v=self.__limitname, c=category) histname += suffix logging.debug("writing histogram {0}".format(histname)) try: hist = self._get_histogram(proc, uncertainty) file.WriteObject(hist, histname, "WriteDelete") except KeyError: pass
def normalize(cuts, lumi, limit=None): weights = None processed = None ntuplized = None analyzed = None for cut in cuts: if str(cut).lower() == "dataset processed": processed = cut elif str(cut).lower() == "dataset event weights": weights = cut elif str(cut).lower() == "ntuple": ntuplized = cut elif str(cut).lower() == "ntuple analyzed": analyzed = cut elif processed and weights and ntuplized and analyzed: break dsetnorm = StaticCut("Dataset norm") luminorm = StaticCut("Luminosity norm") for proc in cuts[-1].processes(): if str(proc).startswith("collisions") or str(proc).startswith("fakes"): dsetnorm[proc] = cuts[-1][proc] luminorm[proc] = cuts[-1][proc] else: p = Process.get(proc) scale = processed[proc] / float(weights[proc]) if ntuplized[proc] == 0 or analyzed[proc] == 0: logging.warning("0 event count for {}".format(proc)) fraction = 1. elif (not limit) or analyzed[proc] < limit: fraction = 1. else: fraction = analyzed[proc] / float(ntuplized[proc]) logging.warning("scaling {} by {} to compensate for partially analyzed dataset".format(proc, 1. / fraction)) dsetnorm[proc] = cuts[-1][proc] * scale / fraction luminorm[proc] = cuts[-1][proc] * scale / fraction * lumi * p.cross_section / float(p.events) cuts.append(dsetnorm) cuts.append(luminorm)
def analyze(args, config): fn = os.path.join(config["outdir"], "ntuple.root") if args.reuse: cutflows = split_cuts(load_cutflows(config)) else: if os.path.exists(fn): os.unlink(fn) cutflows = setup_cuts(config) for proc in set(sum((Process.expand(p) for p in config['plot'] + config['limits']), [])): uncertainties = ['NA'] if args.systematics: weights = config.get(proc.cutflow + ' weights') systematics = config.get(proc.cutflow + ' systematics', []) uncertainties = [s for s, w in expand_systematics(systematics, weights)] for unc in uncertainties: suffix = '' if unc == 'NA' else '_' + unc counts, cuts, weights = cutflows[proc.cutflow + suffix] if len(counts) > 0 and str(proc) in counts[0].processes(): continue logging.info("using systematics: " + unc) local_cuts = list(cuts) for cfg in proc.additional_cuts: local_cuts.insert(0, Cut(*cfg)) proc.analyze(config, fn, counts, local_cuts, weights, unc, args.debug_cuts) concatenated_cutflows = Cutflows() for name, (counts, cuts, weights) in cutflows.items(): cuts = counts + cuts + weights normalize(cuts, config["lumi"], config.get("event limit")) concatenated_cutflows[name] = cuts concatenated_cutflows.save(config)
def fill(args, config): cutflows = load_cutflows(config) for name, cuts in cutflows.items(): normalize(cuts, config["lumi"], config.get("event limit")) categories, definitions = get_categories(config) atomic_processes = set(sum(map(Process.expand, config['plot'] + config['limits']), [])) limit_processes = config["limits"] all_processes = set(Process.get(n) for n in limit_processes + config['plot']) | atomic_processes if len(all_processes) != len(set([p.limitname for p in all_processes])): logging.error("the limit names of the processes are not unique and will lead to collisions!") if 'indir' in config and config['indir'] != config['outdir']: shutil.copy( os.path.join(config['indir'], 'cutflow.pkl'), os.path.join(config['outdir'], 'cutflow.pkl') ) fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root") forest = Forest(fn) for category, definition in zip(categories, definitions): logging.info("filling category: " + category) Plot.reset() for proc in atomic_processes: logging.info("filling process: " + str(proc)) weights = config.get(proc.cutflow + ' weights') systematics = config.get(proc.cutflow + ' systematics', []) uncertainties = [('NA', weights)] if args.systematics: uncertainties = expand_systematics(systematics, weights) for n, (systematic, weights) in enumerate(uncertainties): logging.info("using systematics: " + systematic) logging.info("using weights: " + ", ".join(weights)) for p in Plot.plots(): if (not args.essential and n == 0) or p.essential(): p.fill(proc, systematic, weights, definition) uncertainties = None if args.systematics: uncertainties = list(set(sum((config.get(p.cutflow + ' systematics', []) for p in atomic_processes), []))) logging.info("writing out plots for category: " + category) fn = os.path.join(config["outdir"], "plots.root") with open_rootfile(fn) as f: for p in Plot.plots(): p.write(f, cutflows, category, uncertainties, procs=all_processes, fmt=config["histformat"]) discriminants = config.get("discriminants", []) fn = os.path.join(config["outdir"], "limits.root") with open_rootfile(fn) as f: for p in Plot.plots(): if p.limitname in discriminants: p.write(f, cutflows, category, uncertainties, procs=limit_processes, fmt=config["histformat"]) timing = sorted(Plot.plots(), key=lambda p: p._time) for p in timing[:10] + timing[-10:]: logging.debug("plot filling time for {0}: {1}".format(p, p._time)) logging.info("done filling category: " + category) del forest
def _get_histogram(self, process): procs = Process.expand(process) h = self.__hists[procs[0]].Clone() for proc in procs[1:]: h.Add(self.__hists[proc]) return h