def cuts(self, category=None, **kwargs): cuts = Cut('') if category is not None: cuts = category.cuts & category.common_cuts if self._cuts: cuts &= Cut(self._cuts) return cuts
def cuts(self, category=None, **kwargs): cuts = Cut(self._cuts) if category is not None: cuts &= category.get_cuts(**kwargs) if self.trigger: cuts &= Cut('hlt_matched_to_offline == 1') return cuts
class Category(object): __metaclass__ = CategoryMeta # common attrs for all categories. Override in subclasses analysis_control = False is_control = False # category used for normalization norm_category = None qcd_shape_region = 'nOS' # no track cut target_region = 'OS_ISOL' cuts = Cut() common_cuts = Cut() from .. import samples # by default train with all modes train_signal_modes = samples.Higgs.MODES[:] plot_label = None @classmethod def get_cuts(cls, year, deta_cut=True): cuts = cls.cuts & cls.common_cuts if hasattr(cls, 'year_cuts') and year in cls.year_cuts: cuts &= cls.year_cuts[year] return cuts @classmethod def get_parent(cls): if cls.is_control: return cls.__bases__[0] return cls @classmethod def control(clf, name): return clf.controls[name]
def __init__(self, low=DEFAULT_LOW_MASS, high=DEFAULT_HIGH_MASS, high_sideband_in_control=True, mass_window_signal_region=False, train_signal_region=False, low_cutoff=None): # control region is low and high mass sidebands self.__control_region = Cut('%s < %d' % (MMC_MASS, low)) if low_cutoff is not None: self.__control_region &= Cut('%s > %d' % (MMC_MASS, low_cutoff)) if high_sideband_in_control: assert high > low self.__control_region |= Cut('%s > %d' % (MMC_MASS, high)) if mass_window_signal_region: # signal region is the negation of the control region self.__signal_region = -self.__control_region else: # signal region is not restricted self.__signal_region = Cut() if train_signal_region: # train on only the signal region self.__train_region = self.__signal_region else: # train on everything self.__train_region = Cut() log.info("control region: %s" % self.__control_region) log.info("signal region: %s" % self.__signal_region) log.info("train region: %s" % self.__train_region)
def train(self, mode='gg',level='reco', **kwargs): """ Run, Run ! """ log.info("Training BRT") self.set_variables() training_samples = [] if args.train_mode == 'Z': z_array= Higgs(tree_name= 'Tree', mode=args.train_mode, level=level, masses=[90], suffix='_train') training_samples.append(z_array.components) training_samples= sum(training_samples, []) elif args.train_mode == 'mix': higgs_array_gg= Higgs(tree_name= 'Tree', mode="gg", level=level, masses=Higgs.MASSES, suffix='_train') higgs_array_vbf= Higgs(tree_name= 'Tree', mode="VBF", level=level, masses=Higgs.MASSES, suffix='_train') training_samples.append(higgs_array_gg.components) training_samples.append(higgs_array_vbf.components) training_samples= sum(training_samples, []) else: higgs_array= Higgs(tree_name= 'Tree', mode=args.train_mode, level=level, masses=Higgs.MASSES, suffix='_train') training_samples.append(higgs_array.components) training_samples= sum(training_samples, []) params = ['nTrain_Regression=0'] params += ['nTest_Regression=1'] #params = ['SplitMode=Random'] params += ['NormMode=NumEvents'] params += ['!V'] params = ':'.join(params) masses = [] for s in training_samples: rfile = get_file(ntuple_path=s.ntuple_path+ relative_path, file_name = None, student=s.student, hdf=False, suffix='_train' , force_reopen=False) tree = rfile[s.tree_name] n = tree.GetEntries() masses.append(n) ## weight input samples self.AddRegressionTree(tree, 1./n) #### be careful preselection cuts must be applied after adding regression trees cut = Cut("ditau_tau1_matched_isHadTau==1") & Cut("ditau_tau0_matched_isHadTau==1") self.PrepareTrainingAndTestTree(cut, params) self.AddRegressionTarget('parent_m') # Could reweight samples # self.AddWeightExpression("my_expression") # Actual training self.book_brt(**kwargs) self.TrainAllMethods()
def cut_systematics(self): systematics = super(Embedded_Ztautau, self).cut_systematics() if self.year == 2011: return systematics # isolation treatment in 2012 is different systematics.update({ 'ISOL': { # MUON ISOLATION 'UP': Cut('(embedding_isolation == 2)'), 'DOWN': Cut(), 'NOMINAL': Cut('(embedding_isolation >= 1)') } }) return systematics
def efficiency(sample, selection, prong, category): category = category.replace( 'tau_numberOfVertices', 'number_of_good_vertices') #total = sample.events(Cut('trueTau1_nProng==%d' % prong) & category) total = sample.events(Cut('tau1_numTrack==%d' % prong) & category) passing = 0. cut = Cut('tau1_numTrack==%d' % prong) & category for weight, event in sample.iter(cut): if (event.tau1_BDTJetScore > selection.Eval(event.tau1_fourvect.Pt())): passing += weight return passing / total
def __init__(self, cuts=None, ntuple_path=NTUPLE_PATH, file_name=None, student=DEFAULT_STUDENT, suffix='train', level=None, tree_name="NOMINAL", name='Sample', label='Sample', weight_field=None, **hist_decor): if cuts is None: self._cuts = Cut() else: self._cuts = cuts self.ntuple_path = ntuple_path self.file_name = file_name self.student = student self.suffix = suffix self.tree_name = tree_name self.name = name self.label = label self.weight_field = weight_field self.hist_decor = hist_decor if 'fillstyle' not in hist_decor: self.hist_decor['fillstyle'] = 'solid'
def __init__(self, cuts=None, ntuple_path=NTUPLE_PATH, student=DEFAULT_STUDENT, tree_name=DEFAULT_TREE, name='Sample', label='Sample', trigger=False, weight_field=None, **hist_decor): if cuts is None: self._cuts = Cut() else: self._cuts = cuts self.ntuple_path = ntuple_path self.student = student self.tree_name = tree_name self.name = name self.label = label self.trigger = trigger self.weight_field = weight_field log.info('{0}: weights are {1}'.format(self.name, weight_field)) self.hist_decor = hist_decor if 'fillstyle' not in hist_decor: self.hist_decor['fillstyle'] = 'solid'
class Category(object): __metaclass__ = CategoryMeta cuts = Cut() common_cuts = Cut() plot_label = None @classmethod def get_cuts(cls): cuts = cls.cuts & cls.common_cuts return cuts @classmethod def get_parent(cls): return cls
def train(self, mode='gg', **kwargs): """ Run, Run ! """ self.set_variables() higgs_array = Higgs(mode=mode, masses=Higgs.MASSES, suffix='_train') cut = Cut('hadhad==1') # params = ['nTrain_Regression=0'] # params += ['nTest_Regression=1'] params = ['SplitMode=Random'] params += ['NormMode=NumEvents'] params += ['!V'] params = ':'.join(params) self.PrepareTrainingAndTestTree(cut, params) for s in higgs_array.components: rfile = get_file(s.ntuple_path, s.student, suffix=s.suffix) tree = rfile[s.tree_name] self.AddRegressionTree(tree) self.AddRegressionTarget('resonance_m') # Could reweight samples # self.AddWeightExpression("my_expression") # Actual training self.book_brt(**kwargs) self.TrainAllMethods()
class Category_Preselection(Category): name = 'preselection' label = '#tau_{had}#tau_{had} Preselection' common_cuts = ( PRESELECTION & Cut(MET_CENTRALITY.format(pi / 4)) )
def get_hist_array(self, field_hist_template, category=None, cuts=None, multi_proc=False): """ """ sel = Cut('') if category is not None: sel = self.cuts(category) if not cuts is None: sel &= cuts if self.weight_field is not None: sel *= self.weight_field field_hists = {} if multi_proc: keys = [key for key in field_hist_template.keys()] workers = [ FuncWorker(self.draw_helper, field_hist_template[key], key, sel) for key in keys ] run_pool(workers, n_jobs=-1) for key, w in zip(keys, workers): field_hists[key] = asrootpy(w.output) else: for key, hist in field_hist_template.items(): field_hists[key] = self.fill_hist(hist, key, sel) return field_hists
class Category_Cuts_VBF_LowDR(Category_Preselection): name = 'cuts_vbf_lowdr' label = '#tau_{had}#tau_{had} CB VBF High-p_{T}^{H}' latex = '\\textbf{VBF High-$p_T^{H}$}' color = 'red' jk_number = 7 linestyle = 'dotted' cuts = (CUTS_VBF_CUTBASED & Cut('dR_tau1_tau2 < 1.5') & Cut('resonance_pt > 140000')) cuts_truth = (CUTS_TRUE_VBF_CUTBASED & Cut('true_resonance_pt>140000')) limitbins = {} limitbins[2011] = [0, 64, 80, 92, 104, 116, 132, INF] # limitbins[2012] = [0, 64, 80, 92, 104, 116, 132, 176, INF] limitbins[2012] = [0, 60, 80, 100, 120, 150, INF] # - new binning #limitbins[2012] = [0, 60, 80, 100, 120, 180, INF] # - new binning norm_category = Category_Preselection
class Category_VBF_DEta_Control(Category_Preselection): is_control = True name = 'vbf_deta_control' plot_label = 'Multijet CR' common_cuts = Category_Preselection.common_cuts #norm_category = Category_Preselection_DEta_Control norm_category = Category_Preselection cuts = CUTS_VBF_CR & Cut('dEta_jets > 2.0')
def events(self, category=None, cuts=None, weighted=False): selection = Cut(self._cuts) if category is not None: selection &= self.cuts(category) if cuts is not None: selection &= cuts if weighted and self.weight_field is not None: selection *= self.weight_field return self.draw_helper(Hist(1, 0.5, 1.5), '1', selection)
class Category_Cuts_Boosted_Loose(Category_Preselection): name = 'cuts_boosted_loose' label = '#tau_{had}#tau_{had} CB Boosted Low-p_{T}^{H}' latex = '\\textbf{Boosted Low-$p_T^{H}$}' color = 'blue' linestyle = 'dotted' jk_number = 5 cuts = ((-CUTS_VBF_CUTBASED) & CUTS_BOOSTED_CUTBASED & (Cut('dR_tau1_tau2 > 1.5') | Cut('resonance_pt<140000'))) cuts_truth = (CUTS_TRUE_BOOSTED & Cut('true_resonance_pt<140000')) limitbins = {} # limitbins[2011] = [0,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,156,200,INF] - old binning limitbins[2011] = [0, 80, 88, 96, 104, 112, 120, 128, 140, 156, INF] # limitbins[2012] = [0,64,80,84,88,92,96,100,104,108,112,116,120,124,128,132,136,140,148,156,176,INF] - old binning # limitbins[2012] = [0, 64, 80, 88, 96, 104, 112, 120, 128, 136, 148, 176, INF] # - new binning (merge of the old) # limitbins[2012] = [0, 64, 96, 104, 112, 120, 128, 136, 148, 176, INF] # - alternative new binning (merge of the old) limitbins[2012] = [0, 70, 100, 110, 125, 150, 200, INF] # - new binning # limitbins[2012] = [0, 70, 100, 110, 123, 136, 150, 200, INF] # - new binning (test obs pval) norm_category = Category_Preselection
class Category_Cuts_VBF_HighDR_Loose(Category_Preselection): name = 'cuts_vbf_highdr_loose' label = '#tau_{had}#tau_{had} CB VBF Low-p_{T}^{H} Loose' latex = '\\textbf{VBF Low-$p_T^{H}$ Loose}' color = 'red' linestyle = 'dashed' jk_number = 8 cuts = (CUTS_VBF_CUTBASED & (Cut('dR_tau1_tau2 > 1.5') | Cut('resonance_pt < 140000')) & Cut('mass_jet1_jet2 < (-250000 * dEta_jets + 1550000)')) cuts_truth = ( CUTS_TRUE_VBF_CUTBASED & Cut('true_resonance_pt<140000') & Cut('true_mass_jet1_jet2_no_overlap < (-250000 * true_dEta_jet1_jet2_no_overlap + 1550000)' )) # limitbins = [0, 64, 80, 92, 104, 116, 132, 152, 176, INF] - old binning # limitbins = [0, 64, 80, 92, 104, 116, 152, INF] - new binning (merging of old) limitbins = [0, 50, 70, 85, 100, 120, 150, INF] # - new binning norm_category = Category_Preselection
def train_and_test_MVA(name, signal_tree, background_tree, output_file_name, n_sig, n_bgk): outfile = root_open(output_file_name, 'recreate') factory = TMVA.Factory(name, outfile, "!V:!Silent:Color:DrawProgressBar") # signal_tree.SetBranchStatus('*', 0) # background_tree.SetBranchStatus('*', 0) for var in variables: # signal_tree.SetBranchStatus(var, 1) # background_tree.SetBranchStatus(var, 1) factory.AddVariable(var, 'F') factory.AddSignalTree(signal_tree) factory.AddBackgroundTree(bkg_tree) # passes selection (currently marked as all variables are defined. cut1 = Cut('absolute_eta > 0') cut2 = Cut('angle_bl > 0') cut3 = Cut('M3 > 0') cut = cut1 & cut2 & cut3 training_options = "nTrain_Signal=%d:nTrain_Background=%d:nTest_Signal=%d:nTest_Background=%d:!V" % ( n_sig, n_bgk, n_sig, n_bgk) factory.PrepareTrainingAndTestTree(cut, cut, training_options) # methods are # PDE - RS method (PDERS) # K-Nearest Neighbour classifier (KNN) # Linear discriminant (LD) factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood", "!V:NAvEvtPerBin=50") # factory.BookMethod( TMVA.Types.kMLP, "MLP", "!V:NCycles=50:HiddenLayers=10,10:TestRate=5" ) # # factory.BookMethod( TMVA.Types.kBDT, "BDT", "!V:BoostType=Grad:nCuts=20:NNodesMax=5" ); # Train MVAs using the set of training events factory.TrainAllMethods() # ---- Evaluate all MVAs using the set of test events factory.TestAllMethods() # ----- Evaluate and compare performance of all configured MVAs factory.EvaluateAllMethods() outfile.close() bkg_file.close()
class Category_Cuts_Boosted_Tight(Category_Preselection): name = 'cuts_boosted_tight' label = '#tau_{had}#tau_{had} CB Boosted High-p_{T}^{H}' latex = '\\textbf{Boosted High-$p_T^{H}$}' color = 'blue' linestyle = 'verylongdashdot' jk_number = 6 cuts = ((-CUTS_VBF_CUTBASED) & CUTS_BOOSTED_CUTBASED & (Cut('dR_tau1_tau2 < 1.5') & Cut('resonance_pt>140000'))) cuts_truth = (CUTS_TRUE_BOOSTED & Cut('true_resonance_pt>140000')) limitbins = {} # limitbins[2011] = [0,64,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,140,INF] - old binning limitbins[2011] = [ 0, 64, 72, 80, 88, 96, 104, 112, 120, 128, 140, 156, 176, INF ] # limitbins[2012] = [0,64,72,76,80,84,88,92,96,100,104,108,112,116,120,124,128,132,140,156,176,INF] - old binning # limitbins[2012] = [0, 64, 72, 80, 88, 96, 104, 112, 120, 128, 140, 156, 176, INF] - new binning (merging of old) limitbins[2012] = [ 0, 60, 68, 76, 84, 92, 100, 110, 120, 130, 140, 150, 175, INF ] # - new binning norm_category = Category_Preselection
class Category_Cuts_VBF_HighDR_Tight(Category_Preselection): name = 'cuts_vbf_highdr_tight' label = '#tau_{had}#tau_{had} CB VBF Low-p_{T}^{H} Tight' latex = '\\textbf{VBF Low-$p_T^{H}$ Tight}' jk_number = 9 color = 'red' linestyle = 'verylongdash' cuts = (CUTS_VBF_CUTBASED & (Cut('dR_tau1_tau2 > 1.5') | Cut('resonance_pt < 140000')) & Cut('mass_jet1_jet2 > (-250000 * dEta_jets + 1550000)')) cuts_truth = ( CUTS_TRUE_VBF_CUTBASED & Cut('true_resonance_pt<140000') & Cut('true_mass_jet1_jet2_no_overlap > (-250000 * true_dEta_jet1_jet2_no_overlap + 1550000)' )) # limitbins = [0, 80, 92, 104, 116, 132, 152, INF] - old binning # limitbins = [0, 80, 104, 132, INF] - new bining (merging of old) limitbins = [0, 70, 100, 125, 150, INF] # - new binning # limitbins = [0, 70, 100, 115, 135, 150, INF] # - new binning (test postfit pval) norm_category = Category_Preselection
def efficiency_validation(sample, selection, prong, category): category = category.replace( 'tau_numberOfVertices', 'number_of_good_vertices') #total = sample.events(Cut('trueTau1_nProng==%d' % prong) & category) total = sample.events(Cut('tau1_numTrack==%d' % prong) & category) passing = 0. cut = Cut('tau1_numTrack==%d' % prong) & category for weight, event in sample.iter(cut): if selection == 'loose': if event.tau1_JetBDTSigLoose == 1: passing += weight elif selection == 'medium': if event.tau1_JetBDTSigMedium == 1: passing += weight elif selection == 'tight': if event.tau1_JetBDTSigTight == 1: passing += weight else: raise ValueError("invalid working point: %s" % selection) return passing / total
def efficiency_uncertainty(sample, selection, prong, category): category = category.replace( 'tau_numberOfVertices', 'number_of_good_vertices') #total = sample.events(Cut('trueTau1_nProng==%d' % prong) & category) total = sample.events(Cut('tau1_numTrack==%d' % prong) & category) passing_high = 0. passing_low = 0. cut = Cut('tau1_numTrack==%d' % prong) & category for weight, event in sample.iter(cut): pt = event.tau1_fourvect.Pt() high_score, low_score = uncertainty( event.tau1_BDTJetScore, pt, event.tau1_numTrack, event.number_of_good_vertices) selection_cut = selection.Eval(pt) if high_score > selection_cut: passing_high += weight if low_score > selection_cut: passing_low += weight return passing_high / total, passing_low / total
class Category_VBF(Category_Preselection): name = 'vbf' label = '#tau_{had}#tau_{had} VBF' latex = '\\textbf{VBF}' color = 'red' linestyle = 'dotted' jk_number = 6 common_cuts = Category_Preselection.common_cuts cuts = (CUTS_VBF & Cut('dEta_jets > 2.0')) cuts_truth = CUTS_TRUE_VBF features = features_vbf # train with only VBF mode signal_train_modes = ['VBF'] norm_category = Category_Preselection controls = {'deta': Category_VBF_DEta_Control}
def events(self, category=None, cuts=None, weighted=False, force_reopen=False): selection = Cut(self._cuts) if category is not None: selection &= self.cuts(category) if cuts is not None: selection &= cuts if weighted and self.weight_field is not None: if isinstance(self.weight_field, (list, tuple)): for w in self.weight_field: selection *= w else: selection *= self.weight_field return self.draw_helper(Hist(1, 0.5, 1.5), '1', selection, force_reopen=force_reopen)
def train_region(self): # make a copy return Cut(self.__train_region)
def signal_region(self): # make a copy return Cut(self.__signal_region)
def control_region(self): # make a copy return Cut(self.__control_region)
from rootpy.tree import Cut __all__ = [ 'get_trigger', ] TRIG_HH_1 = Cut( 'HLT_tau35_medium1_tracktwo_tau25_medium1_tracktwo_L1TAU20IM_2TAU12IM == 1' ) TRIG_HH_2 = Cut( 'HLT_tau35_loose1_tracktwo_tau25_loose1_tracktwo_L1TAU20IM_2TAU12IM == 1') TRIG_HH = TRIG_HH_1 #| TRIG_HH_2 TRIG_LH_1 = Cut('HLT_mu26_imedium == 1') TRIG_LH_2 = Cut('HLT_e28_lhtight_iloose == 1') TRIG_LH = TRIG_LH_1 | TRIG_LH_2 def get_trigger(channel='hadhad'): if channel == 'hadhad': return TRIG_HH elif channel == 'lephad': return TRIG_LH else: raise RuntimeError('wrong channel name')
def _project(tree, var, selection='', weight=1.0, bins=None, includeover=False): h = None if var.count(':') == 0: ## Hist (1D) if bins: if isinstance(bins, tuple): assert len(bins) == 3 h = Hist(*bins) elif isinstance(bins, list): h = Hist(bins) else: assert False else: assert False elif var.count(':') == 1: ## Hist2D ## like rootpy, we use a convention where var=x:y, unlike ROOT varx, vary = var.split(':') var = ':'.join([vary, varx]) if bins: if isinstance(bins, tuple): assert len(bins) == 6 h = Hist2D(*bins) elif isinstance(bins, list): ## TODO: support variable bins for Hist2D h = Hist2D(*bins) #assert False else: assert False else: assert False else: assert False assert h # kwargs['hist'] = h ## add the weight to the selection via a TCut weighted_selection = str(selection) if weight and weight != 1.0: weighted_selection = Cut(weighted_selection) weighted_selection = weighted_selection * weight weighted_selection = str(weighted_selection) tree.Draw('%s>>%s' % (var, h.GetName()), weighted_selection) ## print tree.GetSelectedRows() ## debuging # for event in t: # x = getattr(event, var) # h.fill(x) if h: h.SetDirectory(0) # elist = ROOT.gDirectory.Get('elist') # elist.Print('all') if var.count(':') == 0 and includeover: ## include overflow for Hist (1D) nbins = h.GetNbinsX() c1 = h.GetBinContent(nbins) c2 = h.GetBinContent(nbins + 1) e1 = h.GetBinError(nbins) e2 = h.GetBinError(nbins + 1) h.SetBinContent(nbins, c1 + c2) h.SetBinError( nbins, math.sqrt((c1 * e1 * e1 + c2 * e2 * e2) / (c1 + c2)) if c1 + c2 != 0.0 else 0.0) h.SetBinContent(nbins + 1, 0.0) h.SetBinError(nbins + 1, 0.0) return h