def setUp(self): gROOT.SetBatch(True) gROOT.SetMacroPath(gSystem.Getenv("CMSSW_BASE")+"/src/SLHCL1TrackTriggerSimulations/AMSimulation") gROOT.LoadMacro("python/test/loader.h+") self.tfile = TFile.Open(self.infile) self.ttree = self.tfile.Get("ntupler/tree") self.nevents = self.ttree.GetEntries()
def atlas_style(): # Set Atlas Style if os.getenv("PATH_AtlasStyle"): PATH_AtlasStyle=os.getenv("PATH_AtlasStyle") else: PATH_AtlasStyle="/atlas/data18a/wyusheng/atlas/analysis/tools/AtlasPlotStyle/atlasstyle-00-03-05/" gROOT.SetMacroPath(PATH_AtlasStyle) gROOT.LoadMacro("AtlasStyle.C+") gROOT.ProcessLine("SetAtlasStyle()")
def __init__(self, **kwargs): """ This initialization function sets up the TMVA macro path, and sets up variables for the TMVA reader. These variables are defined as single entry Python arrays. One has to use these arrays as TMVA/ROOT works by reference. This means that adding variables to the TMVA reader amounts to adding pointers to the TMVA reader. We have to then iterate (not very numpy) through our variable arrays, setting the reader variable arrays (and evaluating as we go). kwargs: - var_names: the names of the variables used in the BDT application (default 10 variables) - tmva_dir: the installation location of TMVA macros (/home/dean/software/tmva/TMVA-v4.2.0/test) """ var_names = kwargs.get('var_names', [ 'peaks', 'mean_peaks', 'integral', 'integral_over_peaks', 'max', 'mean', 'max_over_mean', 'std_dev_peaks', 'entropy', 'ps_integral' ]) tmvadir = kwargs.get("tmva_dir", "/home/dean/software/tmva/TMVA-v4.2.0/test") #setting up TMVA macro path try: macro = os.path.join(tmvadir, "TMVAlogon.C") loadmacro = os.path.join(tmvadir, "TMVAGui.C") gROOT.SetMacroPath(tmvadir) gROOT.Macro(macro) gROOT.LoadMacro(loadmacro) except: print("Couldn't successfully establish TMVA macro path") sys.exit() self.reader = TMVA.Reader("!Color") variables = [] for i in xrange(len(var_names)): variables.append(array('f', [0])) for name, var in zip(var_names, variables): self.reader.AddVariable(name, var) self.variables = variables
def plot(self): gROOT.SetBatch(1) N = self.nEvents name = self.name draw1D = self.draw1D draw2D = self.draw2D opt = self.opt outFolder = self.output generate = self.generate imgtype = self.imgtype EventType = self.EventType print(N) print(name[1:]) print(opt) GeVc2 = "GeV ^{2}/#it{c} ^{4}" kspip = "s(K_{ S}^{ 0} #pi^{+})" kspim = "s(K_{ S}^{ 0} #pi^{-})" pippim = "s(#pi^{+} #pi^{-})" titles = { "gArg": "Phase of A(%s, %s)" % (kspip, kspim), "gAbs": "Magnitude of A(%s, %s)" % (kspip, kspim), "s01": "%s distribution" % (kspip), "s02": "%s distribution" % (kspim), "s12": "%s distibution" % (kspim), "s01_vs_s02": "Dalitz plot of %s vs %s" % (kspip, kspim), "s01_vs_s12": "Dalitz plot of %s vs %s" % (kspip, pippim), "s02_vs_s12": "Dalitz plot of %s vs %s" % (kspim, pippim) } xtitles = { "gArg": "%s / (%s)" % (kspip, GeVc2), "gAbs": "%s / (%s)" % (kspip, GeVc2), "s01": "%s / (%s)" % (kspip, GeVc2), "s02": "%s / (%s)" % (kspim, GeVc2), "s12": "%s / (%s)" % (pippim, GeVc2), "s01_vs_s02": "%s / (%s)" % (kspip, GeVc2), "s01_vs_s12": "%s / (%s)" % (kspip, GeVc2), "s02_vs_s12": "%s / (%s)" % (kspim, GeVc2) } ytitles = { "gArg": "%s / (%s)" % (kspim, GeVc2), "gAbs": "%s / (%s)" % (kspim, GeVc2), "s01": "%s / (%s)" % (kspip, GeVc2), "s02": "%s / (%s)" % (kspim, GeVc2), "s12": "%s / (%s)" % (pippim, GeVc2), "s01_vs_s02": "%s / (%s)" % (kspim, GeVc2), "s01_vs_s12": "%s / (%s)" % (pippim, GeVc2), "s02_vs_s12": "%s / (%s)" % (pippim, GeVc2) } title = "" mymacroPath = os.environ['AMPGEN'] + "/kspipi/macro/" gROOT.SetMacroPath(os.pathsep.join([gROOT.GetMacroPath(), mymacroPath])) #gROOT.LoadMacro("lhcbStyle.C") #gROOT.SetStyle("Plain") print(outFolder) os.system("mkdir -p %s" % (outFolder)) out = f"{outFolder}/Generated_Output.root" # subsystem.call(["mkdir", "-p","%s" % (out)]) if (generate): os.system( "Generator --nEvents %i --Output %s --EventType '%s' %s" % (N, out, EventType, opt)) else: out = f"{self.opt}" f = TFile.Open("%s" % (out)) if (name == "gArg" or name == "gAbs" or name == "s01_vs_s02"): title = name[ 1:] + "(A(m^{2}_{K_{S}^{0}#pi^{+}},m^{2}_{K_{S}^{0}#pi^{-}})); m^{2}_{K_{S}^{0}#pi^{+}}(GeV); m^{2}_{K_{S}^{0}#pi^{-}} (GeV)" if (name == "all"): names = [ "s01", "s02", "s12", "s01_vs_s02", "s01_vs_s12", "s02_vs_s12", "gArg", "gAbs" ] for iname in names: s = f.Get(iname) obj = makePlot(s) obj.GetXaxis().SetTitle(xtitles[iname]) obj.GetXaxis().SetTitleOffset(1.1) draw = draw1D c = TCanvas("c_%s" % (iname), iname, 1500, 1000) c.cd() obj.SetTitle(titles[iname]) if (type(obj) != TH1D): print(ytitles[iname]) obj.GetYaxis().SetTitle(ytitles[iname]) draw = draw2D if (type(obj) == TGraph2D): print("test") print(ytitles[iname]) #obj.SetMarkerSize(1) #obj.SetMarkerStyle(2) obj.SetTitle(titles[iname] + ";" + xtitles[iname] + ";" + ytitles[iname]) obj.GetXaxis().SetTitleOffset(4.4) gPad.SetRightMargin(0.15) #palette = TPaletteAxos() obj.GetYaxis().SetTitleOffset(4.4) obj.SetMargin(0.15) # gStyle.SetMarkerSize(1) # gStyle.SetMarkerStyle(1) set_palette() gStyle.SetPalette(55) gPad.SetLeftMargin(0.1) gPad.SetTheta(90) gPad.SetPhi(0) gPad.Update() obj.Draw(draw) c.SaveAs("%s/%s.%s" % (outFolder, iname, imgtype)) else: s = f.Get(name) obj = makePlot(s) # obj.GetXaxis().SetTitle(xtitles[name]) # obj.GetXaxis().SetTitleOffset(0) c = TCanvas("c_%s" % (name), name, 1500, 1000) c.cd() gStyle.SetPalette(100) gStyle.SetPadLeftMargin(0) if (type(obj) == TH1D or type(obj) == RooPlot): obj.SetTitle("%s ; %s" % (titles[name], xtitles[name])) # obj.GetXaxis().SetTitleOffset(500) draw = draw1D else: draw = draw2D #obj.SetMarkerSize(1) #obj.SetMarkerStyle(2) obj.SetTitle("%s; %s; %s" % (titles[name], xtitles[name], ytitles[name])) obj.SetTitleOffset(1.25) print(ytitles[name]) #obj.GetYaxis().SetTitle(ytitles[name]) # obj.SetTitle(titles[name] + "; test") # obj.SetTitle("tset") # gStyle.SetTitleX("testTitle") # obj.SetTitleOffset(0.1) obj.Draw(draw) c.SaveAs("%s/%s.%s" % (outFolder, name, imgtype)) cmd = f"cd {self.output} ; rm -rf *.tar.gz ;tar cvfz plots.tar.gz *.eps; mogrify -format png *.eps; cd -" print(cmd) os.system(cmd)
def main(): try: # Retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # Print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVApplication: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("../macros/") gROOT.Macro("../macros/TMVAlogon.C") # Import TMVA classes from ROOT from ROOT import TMVA # Create the Reader object reader = TMVA.Reader("!Color") # Create a set of variables and declare them to the reader # - the variable names must corresponds in name and type to # those given in the weight file(s) that you use # what to do ??? var1 = array('f', [0]) var2 = array('f', [0]) var3 = array('f', [0]) var4 = array('f', [0]) reader.AddVariable("var1+var2", var1) reader.AddVariable("var1-var2", var2) reader.AddVariable("var3", var3) reader.AddVariable("var4", var4) # book the MVA methods dir = "weights/" prefix = "TMVAnalysis_" for m in mlist: reader.BookMVA(m + " method", dir + prefix + m + ".weights.txt") ####################################################################### # For an example how to apply your own plugin method, please see # TMVA/macros/TMVApplication.C ####################################################################### # Book output histograms nbin = 80 histList = [] for m in mlist: histList.append(TH1F(m, m, nbin, -3, 3)) # Book example histogram for probability (the other methods would be done similarly) if "Fisher" in mlist: probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1) rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0, 1) # Prepare input tree (this must be replaced by your data source) # in this example, there is a toy tree with signal and one with background events # we'll later on use only the "signal" events for the test in this example. # fname = "./tmva_example.root" print "--- Accessing data file: %s" % fname input = TFile.Open(fname) if not input: print "ERROR: could not open data file: %s" % fname sys.exit(1) # # Prepare the analysis tree # - here the variable names have to corresponds to your tree # - you can use the same variables as above which is slightly faster, # but of course you can use different ones and copy the values inside the event loop # print "--- Select signal sample" theTree = input.Get("TreeS") userVar1 = array('f', [0]) userVar2 = array('f', [0]) theTree.SetBranchAddress("var1", userVar1) theTree.SetBranchAddress("var2", userVar2) theTree.SetBranchAddress("var3", var3) theTree.SetBranchAddress("var4", var4) # Efficiency calculator for cut method nSelCuts = 0 effS = 0.7 # Process the events print "--- Processing: %i events" % theTree.GetEntries() sw = TStopwatch() sw.Start() for ievt in range(theTree.GetEntries()): if ievt % 1000 == 0: print "--- ... Processing event: %i" % ievt # Fill event in memory theTree.GetEntry(ievt) # Compute MVA input variables var1[0] = userVar1[0] + userVar2[0] var2[0] = userVar1[0] - userVar2[0] # Return the MVAs and fill to histograms if "CutsGA" in mlist: passed = reader.EvaluateMVA("CutsGA method", effS) if passed: nSelCuts = nSelCuts + 1 # Fill histograms with MVA outputs for h in histList: h.Fill(reader.EvaluateMVA(h.GetName() + " method")) # Retrieve probability instead of MVA output if "Fisher" in mlist: probHistFi.Fill(reader.GetProba("Fisher method")) rarityHistFi.Fill(reader.GetRarity("Fisher method")) # Get elapsed time sw.Stop() print "--- End of event loop: %s" % sw.Print() # Return computed efficeincies if "CutsGA" in mlist: eff = float(nSelCuts) / theTree.GetEntries() deff = math.sqrt(eff * (1.0 - eff) / theTree.GetEntries()) print "--- Signal efficiency for Cuts method : %.5g +- %.5g (required was: %.5g)" % ( eff, deff, effS) # Test: retrieve cuts for particular signal efficiency mcuts = reader.FindMVA("CutsGA method") cutsMin = array('d', [0, 0, 0, 0]) cutsMax = array('d', [0, 0, 0, 0]) mcuts.GetCuts(0.7, cutsMin, cutsMax) print "--- -------------------------------------------------------------" print "--- Retrieve cut values for signal efficiency of 0.7 from Reader" for ivar in range(4): print "... Cut: %.5g < %s <= %.5g" % ( cutsMin[ivar], reader.GetVarName(ivar), cutsMax[ivar]) print "--- -------------------------------------------------------------" # # write histograms # target = TFile("TMVApp.root", "RECREATE") for h in histList: h.Write() # Write also probability hists if "Fisher" in mlist: probHistFi.Write() rarityHistFi.Write() target.Close() print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms" print "==> TMVApplication is done!"
def main(): try: # retrive command line options shortopts = "m:M:C:B:i:t:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str(mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" outfname = outfname + mass_str + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("./") #gROOT.Macro ( "./TMVAlogon.C" ) #gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Define the input variables that shall be used for the classifier training factory.AddVariable("pho1_ptOverM", "P_{T}^{lead} / M_{H}", "", 'F') factory.AddVariable("pho2_ptOverM", "P_{T}^{sublead} / M_{H}", "", 'F') factory.AddVariable("pho1_eta", "#eta^{lead}", "", 'F') factory.AddVariable("pho2_eta", "#eta^{sublead}", "", 'F') factory.AddVariable("d_phi", "#Delta #phi", "rad", 'F') #should this be cos delta Phi factory.AddVariable("H_ptOverM", "P_{T}^{Higgs}/M_{H}", "", 'F') factory.AddVariable("H_eta", "#eta^{Higgs}", "", 'F') factory.AddVariable("sigmaMOverM", "#sigmaM_{cor} / M", 'F') factory.AddVariable("sigmaMOverM_wrongVtx", "#sigmaM_{wrong} / M", 'F') factory.AddVariable("vtx_prob", "P_{vertex}", "", 'F') #factory.AddVariable( "cos_theta_star","cos(#theta)*", "", 'F' ); #factory.AddVariable( "max_eta","max(#eta^{lead},#eta^{sub.})", "", 'F' ); #factory.AddVariable( "min_r9","min(r9^{lead},r9^{sub.})", "", 'F' ); factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str + ".0") signal_test = input.Get(treeNameSig + "_test" + mass_str + ".0") background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str + ".0") background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str + ".0") # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width)) # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + mass_str + cat_str, "!H:!V:NTrees=400:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + mass_str + cat_str, "!H:!V:NTrees=500:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=50:NNodesMax=5" ) #test # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:p:M:C:B:i:t:T:o:vh?" opts, args = getopt.getopt(sys.argv[1:], shortopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME methods = DEFAULT_METHODS mass = DEFAULT_MASS cat = DEFAULT_CAT phil = DEFAULT_PHIL outfname = DEFAULT_OUTFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG bkg_method = DEFAULT_BACKGROUND width = DEFAULT_WIDTH verbose = False test = False testType = DEFAULT_TEST_TYPE methTest = False testMethod = DEFAULT_TEST_METHOD for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-M", "--mass"): mass = int(a) elif o in ("-C", "--cat"): cat = int(a) elif o in ("-p", "--philosophy"): phil = a elif o in ("-B", "--background"): bkg_method = int(a) elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-T", "--test"): test = True temp = a.split('_') if len(temp) == 1: testType = temp[0] if testType != "ada" or testType != "grad": print "ERROR: testType must be ada or grad not", testType elif len(temp) - temp.count('') == 2: methTest = True testType = temp[0] testMethod = temp[1] checkTestType(testType, testMethod) else: print "ERROR: need to give one or two test options" print temp sys.exit(1) elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True if (width == 0.02): width_str = "_2pt" elif (width == 0.07): width_str = "_7pt" mass_str = "_" + str("%3.1f" % mass) cat_str = "_" + str(cat) if cat < 0: cat_str = "_all" if test: if methTest: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + "_" + testMethod + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + ".root" else: outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + ".root" #treeNameSig = treeNameSig + mass_str #treeNameBkg = treeNameBkg + mass_str # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) # load also GUI gROOT.SetMacroPath("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/") gROOT.Macro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAlogon.C") gROOT.LoadMacro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for # more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) factory.AddVariable("bdtoutput", "BDT Output", 'F') factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F') input = TFile.Open(infname) # Get the signal and background trees for training signal_train = input.Get(treeNameSig + "_train" + mass_str) signal_test = input.Get(treeNameSig + "_test" + mass_str) background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str) background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal_train, signalWeight, "train") factory.AddBackgroundTree(background_train, backgroundWeight, "train") factory.AddSignalTree(signal_test, signalWeight, "test") factory.AddBackgroundTree(background_test, backgroundWeight, "test") # Set individual event weights (the variables must exist in the original # TTree) factory.SetBackgroundWeightExpression("wt") factory.SetSignalWeightExpression("wt") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycut = TCut("fabs(deltaMOverM)<=" + str(width) + " && bdtoutput > -0.5") # # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing factory.PrepareTrainingAndTestTree( mycut, mycut, "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V") # Boosted Decision Trees # NEW PARAMETERS if (not test): # Likelihood factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood" + phil, "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD" + phil, "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) #factory.BookMethod( TMVA.Types.kPDERS, "MultiLikelihood"+phil,"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ); # BDT factory.BookMethod( TMVA.Types.kBDT, "BDTada" + phil, "!H:!V:NTrees=200:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=1.0:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) factory.BookMethod( TMVA.Types.kBDT, "BDTgrad" + phil, "!H:!V:NTrees=200:MaxDepth=3:BoostType=Grad:Shrinkage=0.5:UseBaggedGrad:GradBaggingFraction=1.0:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) else: #test # BDT ada if testType == "ada": #if testMethod=="nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: factory.BookMethod( TMVA.Types.kBDT, "BDT_ada" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d", "!H:!V:NTrees=" + str(nTrees) + ":nEventsMin=150:MaxDepth=" + str(depth) + ":BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning" ) # if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_0.05b_"+str(nCuts)+"c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts="+str(nCuts)+":PruneMethod=NoPruning") #if testMethod=="beta": # for beta in [0.05,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_"+str(beta)+"b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta="+str(beta)+":SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") # BDT grad if testType == "grad": if testMethod == "nTrees": for nTrees in [10, 50, 100, 200, 500]: for depth in [2, 3]: for shrinkage in [0.05, 0.5, 1.]: factory.BookMethod( TMVA.Types.kBDT, "BDT_grad" + str(phil) + "_" + str(nTrees) + "t_" + str(depth) + "d_" + str(shrinkage) + "s", "!H:!V:NTrees=" + str(nTrees) + ":MaxDepth=" + str(depth) + ":BoostType=Grad:Shrinkage=" + str(shrinkage) + ":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10" ) #if testMethod=="depth": # for depth in [2,3]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning") #if testMethod=="shrinkage": # for shrinkage in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_"+str(shrinkage)+"s_1gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage="+str(shrinkage)+":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="bagFrac": # for bagFrac in [0.05,0.1,0.5,1.]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_"+str(bagFrac)+"gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction="+str(bagFrac)+":SeparationType=GiniIndex:nCuts=50:NNodesMax=10") #if testMethod=="nCuts": # for nCuts in [5,10,20,50,100,200]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_"+str(nCuts)+"c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts="+str(nCuts)+":NNodesMax=10") #if testMethod=="nNM": # for nNM in [10,100,500,1000,10000]: # factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_50c_"+str(nNM)+"nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax"+str(nNM)) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs #factory.OptimizeAllMethods() factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
def main(): try: # retrive command line options shortopts = "m:o:l:s:vh?" longopts = ["methods=", "outputfile=", "lepton=", "signal=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) lepton = DEFAULT_LEPTON outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False signal = DEFAULT_SIGNAL for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-s", "--signal"): signal = a elif o in ("-v", "--verbose"): verbose = True elif o in ("-l", "--lepton"): if a == "electron": lepton = ELECTRON elif a == "muon": lepton = MUON else: print "*** Lepton must be 'electron' or 'muon ****" sys.exit(1) # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # let's open the input files if lepton == ELECTRON: print "Lepton is ELECTRON." path = "/data3/jmitrevs/lepphoton/elphoton_ntuple2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_el.root" wino_600_500FileName = path + "wino_600_500_el.root" wino_1000_200FileName = path + "wino_1000_200_el.root" wino_1500_300FileName = path + "wino_1500_300_el.root" WlepnuFileName_Np0 = path + "Wenu_Np0.root" WlepnuFileName_Np1 = path + "Wenu_Np1.root" WlepnuFileName_Np2 = path + "Wenu_Np2.root" WlepnuFileName_Np3 = path + "Wenu_Np3.root" WlepnuFileName_Np4 = path + "Wenu_Np4.root" WlepnuFileName_Np5 = path + "Wenu_Np5.root" ZleplepFileName_Np0 = path + "Zee_Np0.root" ZleplepFileName_Np1 = path + "Zee_Np1.root" ZleplepFileName_Np2 = path + "Zee_Np2.root" ZleplepFileName_Np3 = path + "Zee_Np3.root" ZleplepFileName_Np4 = path + "Zee_Np4.root" ZleplepFileName_Np5 = path + "Zee_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_enu.root" st_schan_lepnuFileName = path + "st_schan_enu.root" ZleplepgammaFileName = path + "Zeegamma.root" elif lepton == MUON: print "Lepton is MUON." path = "/data3/jmitrevs/lepphoton/muphoton2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_mu.root" wino_600_500FileName = path + "wino_600_500_mu.root" wino_1000_200FileName = path + "wino_1000_200_mu.root" wino_1500_300FileName = path + "wino_1500_300_mu.root" WlepnuFileName_Np0 = path + "Wmunu_Np0.root" WlepnuFileName_Np1 = path + "Wmunu_Np1.root" WlepnuFileName_Np2 = path + "Wmunu_Np2.root" WlepnuFileName_Np3 = path + "Wmunu_Np3.root" WlepnuFileName_Np4 = path + "Wmunu_Np4.root" WlepnuFileName_Np5 = path + "Wmunu_Np5.root" ZleplepFileName_Np0 = path + "Zmumu_Np0.root" ZleplepFileName_Np1 = path + "Zmumu_Np1.root" ZleplepFileName_Np2 = path + "Zmumu_Np2.root" ZleplepFileName_Np3 = path + "Zmumu_Np3.root" ZleplepFileName_Np4 = path + "Zmumu_Np4.root" ZleplepFileName_Np5 = path + "Zmumu_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_munu.root" st_schan_lepnuFileName = path + "st_schan_munu.root" ZleplepgammaFileName = path + "Zmumugamma.root" else: raise ValueError("Lepton has to be ELECTRON or MUON.") WtaunuFileName_Np0 = path + "Wtaunu_Np0.root" WtaunuFileName_Np1 = path + "Wtaunu_Np1.root" WtaunuFileName_Np2 = path + "Wtaunu_Np2.root" WtaunuFileName_Np3 = path + "Wtaunu_Np3.root" WtaunuFileName_Np4 = path + "Wtaunu_Np4.root" WtaunuFileName_Np5 = path + "Wtaunu_Np5.root" ZtautauFileName_Np0 = path + "Ztautau_Np0.root" ZtautauFileName_Np1 = path + "Ztautau_Np1.root" ZtautauFileName_Np2 = path + "Ztautau_Np2.root" ZtautauFileName_Np3 = path + "Ztautau_Np3.root" ZtautauFileName_Np4 = path + "Ztautau_Np4.root" ZtautauFileName_Np5 = path + "Ztautau_Np5.root" st_tchan_taunuFileName = path + "st_tchan_taunu.root" st_schan_taunuFileName = path + "st_schan_taunu.root" st_WtFileName = path + "st_Wt.root" WgammaFileName_Np0 = path + "Wgamma_Np0.root" WgammaFileName_Np1 = path + "Wgamma_Np1.root" WgammaFileName_Np2 = path + "Wgamma_Np2.root" WgammaFileName_Np3 = path + "Wgamma_Np3.root" WgammaFileName_Np4 = path + "Wgamma_Np4.root" WgammaFileName_Np5 = path + "Wgamma_Np5.root" ttbarFileName = path + "ttbar.root" WWFileName = path + "WW.root" WZFileName = path + "WZ.root" ZZFileName = path + "ZZ.root" ZtautaugammaFileName = path + "Ztautaugamma.root" ########################################### wino_600_200File = TFile(wino_600_200FileName) wino_600_500File = TFile(wino_600_500FileName) wino_1000_200File = TFile(wino_1000_200FileName) wino_1500_300File = TFile(wino_1500_300FileName) WlepnuFile_Np0 = TFile(WlepnuFileName_Np0) WlepnuFile_Np1 = TFile(WlepnuFileName_Np1) WlepnuFile_Np2 = TFile(WlepnuFileName_Np2) WlepnuFile_Np3 = TFile(WlepnuFileName_Np3) WlepnuFile_Np4 = TFile(WlepnuFileName_Np4) WlepnuFile_Np5 = TFile(WlepnuFileName_Np5) WtaunuFile_Np0 = TFile(WtaunuFileName_Np0) WtaunuFile_Np1 = TFile(WtaunuFileName_Np1) WtaunuFile_Np2 = TFile(WtaunuFileName_Np2) WtaunuFile_Np3 = TFile(WtaunuFileName_Np3) WtaunuFile_Np4 = TFile(WtaunuFileName_Np4) WtaunuFile_Np5 = TFile(WtaunuFileName_Np5) ZleplepFile_Np0 = TFile(ZleplepFileName_Np0) ZleplepFile_Np1 = TFile(ZleplepFileName_Np1) ZleplepFile_Np2 = TFile(ZleplepFileName_Np2) ZleplepFile_Np3 = TFile(ZleplepFileName_Np3) ZleplepFile_Np4 = TFile(ZleplepFileName_Np4) ZleplepFile_Np5 = TFile(ZleplepFileName_Np5) ZtautauFile_Np0 = TFile(ZtautauFileName_Np0) ZtautauFile_Np1 = TFile(ZtautauFileName_Np1) ZtautauFile_Np2 = TFile(ZtautauFileName_Np2) ZtautauFile_Np3 = TFile(ZtautauFileName_Np3) ZtautauFile_Np4 = TFile(ZtautauFileName_Np4) ZtautauFile_Np5 = TFile(ZtautauFileName_Np5) WgammaFile_Np0 = TFile(WgammaFileName_Np0) WgammaFile_Np1 = TFile(WgammaFileName_Np1) WgammaFile_Np2 = TFile(WgammaFileName_Np2) WgammaFile_Np3 = TFile(WgammaFileName_Np3) WgammaFile_Np4 = TFile(WgammaFileName_Np4) WgammaFile_Np5 = TFile(WgammaFileName_Np5) ttbarFile = TFile(ttbarFileName) st_tchan_lepnuFile = TFile(st_tchan_lepnuFileName) st_tchan_taunuFile = TFile(st_tchan_taunuFileName) st_schan_lepnuFile = TFile(st_schan_lepnuFileName) st_schan_taunuFile = TFile(st_schan_taunuFileName) st_WtFile = TFile(st_WtFileName) WWFile = TFile(WWFileName) WZFile = TFile(WZFileName) ZZFile = TFile(ZZFileName) ZleplepgammaFile = TFile(ZleplepgammaFileName) ZtautaugammaFile = TFile(ZtautaugammaFileName) ########################################### wino_600_200Tree = wino_600_200File.Get("GammaLepton") wino_600_500Tree = wino_600_500File.Get("GammaLepton") wino_1000_200Tree = wino_1000_200File.Get("GammaLepton") wino_1500_300Tree = wino_1500_300File.Get("GammaLepton") WlepnuTree_Np0 = WlepnuFile_Np0.Get("GammaLepton") WlepnuTree_Np1 = WlepnuFile_Np1.Get("GammaLepton") WlepnuTree_Np2 = WlepnuFile_Np2.Get("GammaLepton") WlepnuTree_Np3 = WlepnuFile_Np3.Get("GammaLepton") WlepnuTree_Np4 = WlepnuFile_Np4.Get("GammaLepton") WlepnuTree_Np5 = WlepnuFile_Np5.Get("GammaLepton") WtaunuTree_Np0 = WtaunuFile_Np0.Get("GammaLepton") WtaunuTree_Np1 = WtaunuFile_Np1.Get("GammaLepton") WtaunuTree_Np2 = WtaunuFile_Np2.Get("GammaLepton") WtaunuTree_Np3 = WtaunuFile_Np3.Get("GammaLepton") WtaunuTree_Np4 = WtaunuFile_Np4.Get("GammaLepton") WtaunuTree_Np5 = WtaunuFile_Np5.Get("GammaLepton") ZleplepTree_Np0 = ZleplepFile_Np0.Get("GammaLepton") ZleplepTree_Np1 = ZleplepFile_Np1.Get("GammaLepton") ZleplepTree_Np2 = ZleplepFile_Np2.Get("GammaLepton") ZleplepTree_Np3 = ZleplepFile_Np3.Get("GammaLepton") ZleplepTree_Np4 = ZleplepFile_Np4.Get("GammaLepton") ZleplepTree_Np5 = ZleplepFile_Np5.Get("GammaLepton") ZtautauTree_Np0 = ZtautauFile_Np0.Get("GammaLepton") ZtautauTree_Np1 = ZtautauFile_Np1.Get("GammaLepton") ZtautauTree_Np2 = ZtautauFile_Np2.Get("GammaLepton") ZtautauTree_Np3 = ZtautauFile_Np3.Get("GammaLepton") ZtautauTree_Np4 = ZtautauFile_Np4.Get("GammaLepton") ZtautauTree_Np5 = ZtautauFile_Np5.Get("GammaLepton") WgammaTree_Np0 = WgammaFile_Np0.Get("GammaLepton") WgammaTree_Np1 = WgammaFile_Np1.Get("GammaLepton") WgammaTree_Np2 = WgammaFile_Np2.Get("GammaLepton") WgammaTree_Np3 = WgammaFile_Np3.Get("GammaLepton") WgammaTree_Np4 = WgammaFile_Np4.Get("GammaLepton") WgammaTree_Np5 = WgammaFile_Np5.Get("GammaLepton") ttbarTree = ttbarFile.Get("GammaLepton") st_tchan_lepnuTree = st_tchan_lepnuFile.Get("GammaLepton") st_tchan_taunuTree = st_tchan_taunuFile.Get("GammaLepton") st_schan_lepnuTree = st_schan_lepnuFile.Get("GammaLepton") st_schan_taunuTree = st_schan_taunuFile.Get("GammaLepton") st_WtTree = st_WtFile.Get("GammaLepton") WWTree = WWFile.Get("GammaLepton") WZTree = WZFile.Get("GammaLepton") ZZTree = ZZFile.Get("GammaLepton") ZleplepgammaTree = ZleplepgammaFile.Get("GammaLepton") ZtautaugammaTree = ZtautaugammaFile.Get("GammaLepton") ############################## # and now the weights # wino_600_200_scale = 7.005 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 0.0129441737417 # Wgamma_Np1_scale = 0.0635170304401 # Wgamma_Np2_scale = 0.140920227273 # Wgamma_Np3_scale = 0.140622611111 # Wgamma_Np4_scale = 0.134589 # Wgamma_Np5_scale = 0.123308 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_schan_lepnu_scale = 0.0092735093327 # st_schan_taunu_scale = 0.00926981472204 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 #################ntuple_pt25 # wino_600_200_scale = 1.401 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 1.08706263428 # Wgamma_Np1_scale = 0.734676952566 # Wgamma_Np2_scale = 0.733754057143 # Wgamma_Np3_scale = 0.149752323594 # Wgamma_Np4_scale = 0.157524392683 # Wgamma_Np5_scale = 0.1281354 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 # gamma_Np1_scale = 4.06453310851 # gamma_Np2_scale = 3.3709968686 # gamma_Np3_scale = 1.38728943513 # gamma_Np4_scale = 1.41464077802 # gamma_Np5_scale = 1.23661096137 if lepton == ELECTRON: wino_600_200_scale = 0.291875 wino_600_500_scale = 2.69352 wino_1000_200_scale = 4.1325 wino_1500_300_scale = 0.0093269 wino_1000_100_scale = 69.5 wino_800_700_scale = 0.2328 Wlepnu_Np0_scale = 12.0052623622 Wlepnu_Np1_scale = 3.13076456857 Wlepnu_Np2_scale = 0.60296853897 Wlepnu_Np3_scale = 0.603183318846 Wlepnu_Np4_scale = 0.62088 Wlepnu_Np5_scale = 0.600008571551 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631361988532 Zleplep_Np1_scale = 0.629541167757 Zleplep_Np2_scale = 0.625618828688 Zleplep_Np3_scale = 0.634090909091 Zleplep_Np4_scale = 0.6 Zleplep_Np5_scale = 0.51875 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.200916540624 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.17064063358 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 elif lepton == MUON: wino_600_200_scale = 0.291875 Wlepnu_Np0_scale = 11.9925371604 Wlepnu_Np1_scale = 3.13058966 Wlepnu_Np2_scale = 0.601616497017 Wlepnu_Np3_scale = 0.605913424797 Wlepnu_Np4_scale = 0.606001176701 Wlepnu_Np5_scale = 0.593142857143 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631664271554 Zleplep_Np1_scale = 0.628327597475 Zleplep_Np2_scale = 0.62551337696 Zleplep_Np3_scale = 0.635795454545 Zleplep_Np4_scale = 0.572916666667 Zleplep_Np5_scale = 0.48125 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.201919368378 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963963963964 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.08704733658 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 if signal == "600_200": factory.AddSignalTree(wino_600_200Tree, wino_600_200_scale) elif signal == "600_500": factory.AddSignalTree(wino_600_500Tree, wino_600_500_scale) elif signal == "1000_200": factory.AddSignalTree(wino_1000_200Tree, wino_1000_200_scale) elif signal == "1500_300": factory.AddSignalTree(wino_1500_300Tree, wino_1500_300_scale) else: print "*** signal designation not supported: %s ****" % signal sys.exit(1) factory.AddBackgroundTree(WlepnuTree_Np0, Wlepnu_Np0_scale) factory.AddBackgroundTree(WlepnuTree_Np1, Wlepnu_Np1_scale) factory.AddBackgroundTree(WlepnuTree_Np2, Wlepnu_Np2_scale) factory.AddBackgroundTree(WlepnuTree_Np3, Wlepnu_Np3_scale) factory.AddBackgroundTree(WlepnuTree_Np4, Wlepnu_Np4_scale) factory.AddBackgroundTree(WlepnuTree_Np5, Wlepnu_Np5_scale) #factory.AddBackgroundTree(WtaunuTree_Np0, Wtaunu_Np0_scale) factory.AddBackgroundTree(WtaunuTree_Np1, Wtaunu_Np1_scale) factory.AddBackgroundTree(WtaunuTree_Np2, Wtaunu_Np2_scale) factory.AddBackgroundTree(WtaunuTree_Np3, Wtaunu_Np3_scale) factory.AddBackgroundTree(WtaunuTree_Np4, Wtaunu_Np4_scale) factory.AddBackgroundTree(WtaunuTree_Np5, Wtaunu_Np5_scale) # factory.AddBackgroundTree(ZleplepTree_Np0, Zleplep_Np0_scale) # factory.AddBackgroundTree(ZleplepTree_Np1, Zleplep_Np1_scale) # factory.AddBackgroundTree(ZleplepTree_Np2, Zleplep_Np2_scale) # factory.AddBackgroundTree(ZleplepTree_Np3, Zleplep_Np3_scale) # factory.AddBackgroundTree(ZleplepTree_Np4, Zleplep_Np4_scale) # factory.AddBackgroundTree(ZleplepTree_Np5, Zleplep_Np5_scale) # factory.AddBackgroundTree(ZtautauTree_Np0, Ztautau_Np0_scale) # factory.AddBackgroundTree(ZtautauTree_Np1, Ztautau_Np1_scale) # factory.AddBackgroundTree(ZtautauTree_Np2, Ztautau_Np2_scale) # factory.AddBackgroundTree(ZtautauTree_Np3, Ztautau_Np3_scale) # factory.AddBackgroundTree(ZtautauTree_Np4, Ztautau_Np4_scale) # factory.AddBackgroundTree(ZtautauTree_Np5, Ztautau_Np5_scale) factory.AddBackgroundTree(WgammaTree_Np0, Wgamma_Np0_scale) factory.AddBackgroundTree(WgammaTree_Np1, Wgamma_Np1_scale) factory.AddBackgroundTree(WgammaTree_Np2, Wgamma_Np2_scale) factory.AddBackgroundTree(WgammaTree_Np3, Wgamma_Np3_scale) factory.AddBackgroundTree(WgammaTree_Np4, Wgamma_Np4_scale) factory.AddBackgroundTree(WgammaTree_Np5, Wgamma_Np5_scale) factory.AddBackgroundTree(ttbarTree, ttbar_scale) factory.AddBackgroundTree(st_tchan_lepnuTree, st_tchan_lepnu_scale) factory.AddBackgroundTree(st_tchan_taunuTree, st_tchan_taunu_scale) # factory.AddBackgroundTree(st_schan_lepnuTree, st_schan_lepnu_scale) # factory.AddBackgroundTree(st_schan_taunuTree, st_schan_taunu_scale) factory.AddBackgroundTree(st_WtTree, st_Wt_scale) factory.AddBackgroundTree(WWTree, WW_scale) factory.AddBackgroundTree(WZTree, WZ_scale) factory.AddBackgroundTree(ZZTree, ZZ_scale) factory.AddBackgroundTree(ZleplepgammaTree, Zleplepgamma_scale) factory.AddBackgroundTree(ZtautaugammaTree, Ztautaugamma_scale) # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "MET := sqrt(Metx*Metx+Mety*Mety)", 'F' ) # factory.AddVariable( "HT", 'F' ) factory.AddVariable( "PhotonPt[0]", 'F' ) #factory.AddVariable( "ElectronPt[0]", 'F' ) if lepton == ELECTRON: factory.AddVariable( "mTel", 'F' ) else: factory.AddVariable( "mTmu", 'F' ) #factory.AddVariable( "abs(PhotonEta[0])", 'F' ) #factory.AddVariable( "abs(ElectronEta[0])", 'F' ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "abs(PhotonEta[0]) < 2.01 && abs(ElectronEta[0]) < 2.01" ) if lepton == ELECTRON: mycutSig = TCut( "sqrt((PhotonEta[0]-ElectronEta[0])*(PhotonEta[0]-ElectronEta[0]) + (PhotonPhi[0]-ElectronPhi[0])*(PhotonPhi[0]-ElectronPhi[0])) > 0.7") else: mycutSig = TCut( "sqrt((PhotonEta[0]-MuonEta[0])*(PhotonEta[0]-MuonEta[0]) + (PhotonPhi[0]-MuonPhi[0])*(PhotonPhi[0]-MuonPhi[0])) > 0.7") #mycutSig = TCut( "PhotonPt[0] > 85000" ) mycutBkg = mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) # if "CutsGA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsGA", # "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) # if "CutsSA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsSA", # "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
# --- Imports ---------------------------------------------------------------- # from ROOT import gROOT, gSystem, TFile, TGraphAsymmErrors, TH1D, TF1, TString, TH2D, TGraph, Double, TSpline3, TVector import ctamacropy from os.path import * # compile upon every load: gROOT.SetMacroPath("{0:s}".format(dirname(globals()['ctamacropy'].__file__))) gROOT.LoadMacro("makeCTAspec_v6_pyROOT.C+") #gSystem.AddIncludePath("{0:s}".format(dirname(globals()['cta'].__file__))) #gSystem.AddLinkedLibs("{0:s}".format(join(dirname(globals()['cta'].__file__),"makeCTAspec_v6_pyROOT_C.so"))) gSystem.Load("makeCTAspec_v6_pyROOT_C.so") from ROOT import makeCTAspec from ctypes import POINTER, c_float, c_double, c_int from array import array from ctamacropy import spectra from ctamacropy import convertroot2py as cr2py from copy import deepcopy import numpy as np # ---------------------------------------------------------------------------- # class CTAObsSim(object): def __init__(self, irf, **kwargs): """ Init the observation simulation class Parameters ---------- irf: str full path to CTA IRF root file kwargs
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") #gROOT.Macro ( "./TMVAlogon.C" ) #gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile("TMVA.root", "RECREATE") #outputFile = TFile( outfname, 'RECREATE' ) # Create dataloader dataloader = TMVA.DataLoader("dataset") # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # Read input data # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) file1 = TFile.Open("~/Test/Testy/out_sig.root") signal = file1.Get("ntuple") signalWeight = 1.0 dataloader.AddSignalTree(signal, signalWeight) file2 = TFile.Open("~/Test/Testy/out_bkg.root") background = file2.Get("ntuple") backgroundWeight = 1.0 dataloader.AddBackgroundTree(background, backgroundWeight) # Get the signal and background trees for training dataloader.AddVariable("dist", "Vardist", "units", 'F') dataloader.AddVariable("mu_likep", "Varmup", "units", 'F') dataloader.AddVariable("mu_likem", "Varmum", "units", 'F') dataloader.AddVariable("DeltPhi", "VarDel", "units", 'F') dataloader.AddSpectator("minv", "Varminv", 'F') # Global event weights (see below for setting event-wise weights) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("") mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) from subprocess import call from os.path import isfile from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.regularizers import l2 from keras.optimizers import SGD TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() model = Sequential() model.add(Dense(64, activation='relu', W_regularizer=l2(1e-5), input_dim=4)) model.add(Dense(2, activation='softmax')) # Set loss and optimizer model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01), metrics=[ 'accuracy', ]) # Store model to file model.save('model.h5') model.summary() # Book methods if "Keras" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "Fisher", "!H:!V:Fisher:VarTransform=D,G") factory.BookMethod( dataloader, TMVA.Types.kPyKeras, "PyKeras", "H:!V:VarTransform=D,G:FilenameModel=model.h5:NumEpochs=20:BatchSize=32" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n"
#!/usr/bin/python import ROOT as rt from ROOT import gPad, gROOT, gStyle, TFile, gSystem from ROOT import TF1 import sys sys.path.append('../') gROOT.SetMacroPath("../") import plot_utils as ut from parameter_descriptor import parameter_descriptor as pdesc #_____________________________________________________________________________ def fit_vtx_z(): #gaussian fit to vertex z-position datamc = False #true - data, false - mc if datamc: vbin = 4. else: vbin = 2 vmax = 120. mmin = 1.5 mmax = 5. if datamc: fit_lo = -30. fit_hi = 35. else:
def main(): try: # retrive command line options shortopts = "m:i:n:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] varList = varsList.varList for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables print "here" massPoint = infname preFix = varsList.preFix infname = "ZPrime_%s_all_SYNC_%s_noIso%s" % (massPoint, varsList.fs, varsList.tail) iFileSig = TFile.Open(preFix + infname) sigChain = iFileSig.Get("eventTree_train") factory.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1])) print preFix + varsList.bkg[i][1] bkg_trees_list.append(bkg_list[i].Get("eventTree")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue factory.AddBackgroundTree(bkg_trees_list[i], 1) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries()) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== factory.SetSignalWeightExpression('weightWithPU') factory.SetBackgroundWeightExpression('weightWithPU') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("") mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Block:NormMode=NumEvents:!V" ) # "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") bdtSetting = '!H:!V:NTrees=%s' % nTrees bdtSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100' if "BDT" in mlist: factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # # print "=== wrote root file %s\n" % outfname # print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv %s/weights/TMVAClassification_BDT.weights.xml %s/weights/TMVAClassification_BDT.weights_both_%s.xml' % ( os.getcwd(), os.getcwd(), massPoint) os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAnalysis: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "../macros/" ) gROOT.Macro ( "../macros/TMVAlogon.C" ) gROOT.LoadMacro ( "../macros/TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAnalysis.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAnalysis", outputFile, "!V:!Silent:Color" ) # Set verbosity factory.SetVerbose( verbose ) # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "var1+var2", 'F' ) factory.AddVariable( "var1-var2", 'F' ) factory.AddVariable( "var3", 'F' ) factory.AddVariable( "var4", 'F' ) # Read input data if not gSystem.AccessPathName( infname ): input = TFile( infname ) else: print "ERROR: could not access data file %s\n" % infname # Get the signal and background trees for training signal = input.Get( treeNameSig ) background = input.Get( treeNameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAnalysis.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # This would set individual event weights (the variables defined in the # expression need to exist in the original TTree) # for signal : factory.SetSignalWeightExpression("weight1*weight2") # for background: factory.SetBackgroundWeightExpression("weight1*weight2") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) mycutBkg = TCut( "" ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V" ) # ... and alternative call to use a different number of signal and background training/test event is: # factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" ) # -------------------------------------------------------------------------------------------------- # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemperature=IncreasingAdaptive:InitialTemperature=1e+6:MinTemperature=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmooth=10:NAvEvtPerBin=50" ) # test the decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # test the new kernel density estimator if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # test the mixed splines and kernel density estimator (depending on which variable) if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpol[0]=KDE:PDFInterpol[1]=KDE:PDFInterpol[2]=Spline2:PDFInterpol[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # PDE - RS method if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) # And the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" if "PDERSkNN" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSkNN", "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # K-Nearest Neighbour class ifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "nkNN=400:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Fisher discriminant if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=3:Steps=20:Trim=True:SaveBestGen=0" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:!Normalise:NeuronType=tanh:NCycles=200:HiddenLayers=N+1,N:TestRate=5" ) # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:.. # Support Vector Machines using three d ifferent Kernel types (Gauss, polynomial and linear) if "SVM_Gauss" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" ) if "SVM_Poly" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" ) if "SVM_Lin" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" ) # Boosted Decision Trees (second one with decorrelation) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=1.5" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # Friedman's RuleFit method, implementation by J. Friedman if "RuleFitJF" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitJF", "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAnalysis is done!\n" # open the GUI for the result macros gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV'] varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi',]
def main(): # Default settings for command line arguments DEFAULT_OUTFNAME = "TMVAXi2.root" DEFAULT_INFNAME = "MC_Xic0_2015_filtered.root" DEFAULT_TREESIG = "DecayTree" DEFAULT_TREEBKG = "DecayTree" DEFAULT_METHODS = "Cuts,CutsD,CutsPCA,CutsGA,CutsSA,Likelihood,LikelihoodD,LikelihoodPCA,LikelihoodKDE,LikelihoodMIX,PDERS,PDERSD,PDERSPCA,PDEFoam,PDEFoamBoost,KNN,LD,Fisher,FisherG,BoostedFisher,HMatrix,FDA_GA,FDA_SA,FDA_MC,FDA_MT,FDA_GAMT,FDA_MCMT,MLP,MLPBFGS,MLPBNN,CFMlpANN,TMlpANN,SVM,BDT,BDTD,BDTG,BDTB,RuleFit" import argparse argparser = argparse.ArgumentParser() argparser.add_argument("-m", "--methods", default=repr(DEFAULT_METHODS.split(','))) argparser.add_argument("-o", "--outputfile", default=DEFAULT_OUTFNAME) argparser.add_argument('--variables') argparser.add_argument('-s', '--spectators', default='()') argparser.add_argument('--signalfile', default=DEFAULT_INFNAME) argparser.add_argument('--signaltree', default=DEFAULT_TREESIG) argparser.add_argument('--signalsel', default='') argparser.add_argument('--signalweight', default='') argparser.add_argument('--bkgfile', default=DEFAULT_INFNAME) argparser.add_argument('--bkgtree', default=DEFAULT_TREEBKG) argparser.add_argument('--bkgsel', default='') argparser.add_argument('--bkgweight', default='') argparser.add_argument('--factoryname', default="TMVAClassification") argparser.add_argument('-v', '--verbose', action='store_true', default=False) argparser.add_argument('--weightsdir', default='weights') argparser.add_argument('--datasetname', default='dataset') args = argparser.parse_args() weightsdir = args.weightsdir ROOT.TMVA.Config.Instance().GetIONames().fWeightFileDir = weightsdir # Print methods mlist = eval(args.methods) print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI if os.path.exists('./TMVAlogon.C'): gROOT.Macro("./TMVAlogon.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(args.outputfile, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( args.factoryname, outputFile, "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(args.verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] # For ROOT v6 compatibility. root6 = not hasattr(factory, 'AddVariable') if root6: dataloader = ROOT.TMVA.DataLoader(args.datasetname) else: dataloader = factory for var in eval(args.variables): if not isinstance(var, (tuple, list)): var = (var, ) try: dataloader.AddVariable(*var) except: print 'Failed to call dataloader.AddVariable with args', var raise # dataloader.AddVariable( "myvar1 := var1+var2", 'F' ) # dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' ) # dataloader.AddVariable( "var3", "Variable 3", "units", 'F' ) # dataloader.AddVariable( "var4", "Variable 4", "units", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables for var in eval(args.spectators): if not isinstance(var, (tuple, list)): var = (var, ) try: dataloader.AddSpectator(*var) except: print 'Failed to call dataloader.AddSpectator with args', var raise # dataloader.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # dataloader.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) # input = TFile.Open( infname ) # # Get the signal and background trees for training # signal = input.Get( treeNameSig ) # background = input.Get( treeNameBkg ) signalfile = TFile.Open(args.signalfile) if signalfile.IsZombie(): raise OSError("Couldn't find signal file " + repr(args.signalfile)) signal = signalfile.Get(args.signaltree) if not signal: raise ValueError("Couldn't find signal TTree " + repr(args.signaltree) + " in file " + repr(args.signalfile)) bkgfile = TFile.Open(args.bkgfile) if bkgfile.IsZombie(): raise OSError("Couldn't find bkg file " + repr(args.bkgfile)) background = bkgfile.Get(args.bkgtree) if not background: raise ValueError("Couldn't find bkg TTree " + repr(args.bkgtree) + " in file " + repr(args.bkgfile)) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree(signal, signalWeight) dataloader.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # dataloader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : dataloader.SetSignalWeightExpression ("weight1*weight2"); # for background: dataloader.SetBackgroundWeightExpression("weight1*weight2"); if args.signalweight: dataloader.SetSignalWeightExpression(args.signalweight) if args.bkgweight: dataloader.SetBackgroundWeightExpression(args.bkgweight) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(args.signalsel) mycutBkg = TCut(args.bkgsel) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if root6: # Bit of an ugly hack, but does the job. factory._BookMethod = factory.BookMethod # Don't know why 'self' isn't passed here? def BookMethod(*args): factory._BookMethod(dataloader, *args) factory.BookMethod = BookMethod if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outputFile.GetName() print "=== TMVAClassification is done!\n" # open the GUI for the result macros if not ROOT.gROOT.IsBatch(): if hasattr(TMVA, 'TMVAGui'): TMVA.TMVAGui(outputFile.GetName()) raw_input('Hit enter to quit.') elif 'ROOTSYS' in os.environ: tmvaguipath = os.path.join(os.environ['ROOTSYS'], 'tutorials', 'tmva') if os.path.exists(os.path.join(tmvaguipath, 'TMVAGui.C')): gROOT.SetMacroPath(tmvaguipath) gROOT.LoadMacro("TMVAGui.C") try: gROOT.ProcessLine("TMVAGui(\"%s\")" % outputFile.GetName()) raw_input('Hit enter to quit.') except RuntimeError: print "Couldn't run TMVAGui!" outputfilename = outputFile.GetName() weightsfiles = dict( (m, os.path.join(weightsdir, args.factoryname + '_' + m + '.weights.xml')) for m in mlist) classfiles = dict( (m, os.path.join(weightsdir, args.factoryname + '_' + m + '.class.C')) for m in mlist) # keep the ROOT thread running (this makes the function hang). #gApplication.Run() # TMVA disables unused branches when copying the trees then doesn't change them back. background.SetBranchStatus('*', 1) signal.SetBranchStatus('*', 1) if 'signalfile' in locals(): signalfile.Close() if 'bkgfile' in locals(): bkgfile.Close() return locals()
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) #! gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' ) #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' ) #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV'] # varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] # varList = ['svMass', 'dRTauTau', 'svPt', 'dRhh', 'met', 'mJJReg', # 'metTau1DPhi', 'metTau2DPhi', 'metJ2DPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2'] varList = [ 'svMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg', 'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi', 'CSVJ1', 'CSVJ2' ] for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "fMass") # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # sigChain = r.TChain("ttTreeFinal/eventTree") # bkg1Chain = r.TChain("ttTreeFinal/eventTree") # bkg2Chain = r.TChain("ttTreeFinal/eventTree") # Get the signal and background trees for training iFileSig = TFile.Open("/scratch/zmao/relaxed_regression/%s" % (infname)) iFileBkg = TFile.Open( "/scratch/zmao/relaxed_regression/trainSample_relaxedsamebTag.root") sigChain = iFileSig.Get("eventTree") bkgChain = iFileBkg.Get("eventTree") # Global event weights (see below for setting event-wise weights) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree(sigChain, signalWeight) factory.AddBackgroundTree(bkgChain, 1) factory.SetSignalWeightExpression('triggerEff') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "iso1<1.5 && iso2<1.5 && CSVJ1 > 0.679 && CSVJ2 > 0.244 && abs(eta1)<2.1 && abs(eta2)<2.1 && charge1 + charge2 == 0" ) mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_QCD_%i.xml' % len( varList) os.system(ChangeWeightName)
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string # factory = TMVA.Factory( "TMVAClassification", outputFile, # "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) jobname = DEFAULT_OUTFNAME factory = TMVA.Factory( jobname.replace(".root", ""), outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ) # pascal # Set verbosity factory.SetVerbose(verbose) # Adjust variables if old sample is used if IsOLD: SPECTATORS.remove("JisPU") SPECTATORS.remove("JisHS") # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" theCat1Vars = "" theCat2Vars = "" theCat3Vars = "" for var in VARIABLES: factory.AddVariable(var, 'F') theCat1Vars += var + ":" theCat2Vars += var + ":" theCat3Vars += var + ":" theCat1Vars = theCat1Vars.rstrip(":") theCat2Vars = theCat2Vars.rstrip(":") theCat3Vars = theCat3Vars.rstrip(":") # You can add so-called "Spectator variables", which are not used in the MVA training, for spect in SPECTATORS: factory.AddSpectator(spect, spect) # Apply additional cuts on the signal and background sample. mycutSig = "" mycutBkg = TCut(SELECTION + "&&JisPU") if doJTruthMatchPt10Cut: mycutSig = TCut(SELECTION + "&&JisHS && Jtruthpt>10") else: mycutSig = TCut(SELECTION + "&&JisHS") cat1cuts = TCut("Jpt >20 && Jpt <30") cat2cuts = TCut("Jpt >30 && Jpt <40") cat3cuts = TCut("Jpt >40 && Jpt <50") # open file input = TFile.Open(infname) # Get the signal and background trees for training signal = input.Get(treeNameSig) background = input.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== factory.AddSignalTree(signal, signalWeight) factory.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples TrainingAndTestTreeStr= "nTrain_Signal="+str(DEFAULT_NEVENTS_TRAIN_S)+\ ":nTrain_Background="+str(DEFAULT_NEVENTS_TRAIN_B)+\ ":nTest_Signal="+str(DEFAULT_NEVENTS_TEST_S)+\ ":nTest_Background="+str(DEFAULT_NEVENTS_TEST_B)+\ ":SplitMode=Random:NormMode=EqualNumEvents:!V" factory.PrepareTrainingAndTestTree(mycutSig, mycutBkg, TrainingAndTestTreeStr) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # multidim likelihood --- kNN if "kNN100" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN100", "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=False:BalanceDepth=6" ) if "kNN100trim" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN100trim", "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=True:BalanceDepth=6" ) if "likelihood" in mlist: factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood", "H:!V:") if "BDT" in mlist: BDToptions = "!H:NTrees=850:nEventsMin=150:MaxDepth=5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VerbosityLevel=Error" factory.BookMethod(TMVA.Types.kBDT, "BDT", BDToptions) # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine("TMVAGui(\"%s\")" % outfname) # keep the ROOT thread running gApplication.Run()
def main(): try: # Retrive command line options shortopts = "m:i:o:d:vh?" longopts = [ "methods=", "inputfile=", "outputfile=", "datatype=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # Print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG methods = DEFAULT_METHODS directory = DEFAULT_DATA verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--datatype"): directory = a elif o in ("-v", "--verbose"): verbose = True # Print methods #take leading and trailing white space out methods = methods.strip(" ") mlist = methods.replace(' ', ',').split(',') print "=== TMVApplication: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch print("ROOT classes successfully imported!\n") # DCS 17/06/2016 # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI tmvadir = "/home/dean/software/tmva/TMVA-v4.2.0/test" macro = os.path.join(tmvadir, "TMVAlogon.C") loadmacro = os.path.join(tmvadir, "TMVAGui.C") gROOT.SetMacroPath(tmvadir) gROOT.Macro(macro) gROOT.LoadMacro(loadmacro) print("ROOT macro path loaded correctly!\n") # Import TMVA classes from ROOT from ROOT import TMVA # Create the Reader object reader = TMVA.Reader("!Color") var1 = array('f', [0]) var2 = array('f', [0]) var3 = array('f', [0]) var4 = array('f', [0]) var5 = array('f', [0]) var6 = array('f', [0]) var7 = array('f', [0]) var8 = array('f', [0]) var9 = array('f', [0]) var10 = array('f', [0]) variables = [var1, var2, var3, var4, var5, var6, var7, var8, var9, var10] var_names = [ 'peaks', 'mean_peaks', 'integral', 'integral_over_peaks', 'max', 'mean', 'max_over_mean', 'std_dev_peaks', 'entropy', 'ps_integral' ] #variables = [var1, var2, var3, var4] #var_names = ['var1', 'var2', 'var3', 'var4'] for name, var in zip(var_names, variables): reader.AddVariable(name, var) print("Variables successfully loaded!\n") #reader.AddVariable("Nclusters.value", var1) #reader.AddVariable("(TMath::Log10(eventinfo_ALLOfflinePulseSeriesReco.tot_charge))*1000/eventinfo_ALLOfflinePulseSeriesReco.length" ,var2) #reader.AddVariable("MDCOGLaunches.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length",var3) #reader.AddVariable("Nclusters.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length" ,var4) #reader.AddVariable("NSMT8TRIGGER.value/eventinfo_ALLOfflinePulseSeriesReco.nstrings",var5) #reader.AddVariable("MedianCluster.value",var6) # book the MVA methods #dir = "weights/" #prefix = "TMVAClassification_" # #for m in mlist: # print( m + " method", dir + prefix + m + ".weights.xml") # reader.BookMVA( m + " method", dir + prefix + m + ".weights.xml" ) weight_dir = "/home/dean/capstone/TMVA-classifier/weights/" weights = [f for f in os.listdir(weight_dir) if ".xml" in f] for i, f in enumerate(weights): reader.BookMVA("BDT_{}".format(i), os.path.join(weight_dir, f)) #only care about BDT # reader.BookMVA("BDT","weights/TMVAClassification_BDT.weights.xml") ####################################################################### # For an example how to apply your own plugin method, please see # TMVA/macros/TMVApplication.C ####################################################################### # Book output histograms nbin = 100 histList = [] for m in mlist: histList.append(TH1F(m, m, nbin, -3, 3)) for h in histList: h.Fill(reader.EvaluateMVA(h.GetName() + " method")) # Book example histogram for probability (the other methods would be done similarly) if "Fisher" in mlist: probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1) rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0, 1) filelist = glob(directory + "/" + "Level4b*.hdf") print 30 * "#" print "the filelist, ", filelist print 30 * "--" for file in filelist: try: startfile = tables.openFile(file, "a") #DELETE BDTs if they exist for var in startfile.root._v_children.keys(): if re.match("BDT_", var): startfile.removeNode("/", var) startfile.removeNode("/__I3Index__", var) #NOW CLONE THE NODE for name in histList: startfile.copyNode("/__I3Index__/StdDCOGLaunches", "/__I3Index__", str(name.GetName())) startfile.copyNode("/StdDCOGLaunches", "/", str(name.GetName())) startfile.close() h5 = tables.openFile(file, 'r') mcog_over_t = numpy.divide(h5.root.MDCOGLaunches.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) q_over_t = numpy.divide(numpy.log10(h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.tot_charge[:]),\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) ncluster_over_t = numpy.divide(h5.root.Nclusters.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.) nsmt8_over_string = numpy.divide(h5.root.NSMT8TRIGGER.cols.value[:],\ h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.nstrings[:]) s1 = array('f', h5.root.Nclusters.cols.value[:]) s2 = array('f', q_over_t) s3 = array('f', mcog_over_t[:]) s4 = array('f', ncluster_over_t[:]) s5 = array('f', nsmt8_over_string[:]) s6 = array('f', h5.root.MedianCluster.cols.value[:]) h5.close() result = numpy.zeros((len(histList), len(s1)), numpy.dtype([('Classifier', numpy.double)])) for ievt in range(len(s1)): #if ievt%1000 == 0: # print "--- ... Processing event: %i" % ievt # Fill event in memory # Compute MVA input variables var1[0] = s1[ievt] var2[0] = s2[ievt] var3[0] = s3[ievt] var4[0] = s4[ievt] var5[0] = s5[ievt] var6[0] = s6[ievt] # Fill histograms with MVA outputs for j, h in enumerate(histList): h.Fill(reader.EvaluateMVA(h.GetName() + " method")) result[j][ievt]["Classifier"] = reader.EvaluateMVA( h.GetName() + " method") endfile = tables.openFile(file, 'a') for k, name in enumerate(histList): modifiedNode = endfile.getNode("/", str(name.GetName())) modifiedNode.cols.value[ievt] = result[k][ievt][ "Classifier"] endfile.close() print time.strftime('Elapsed time - %H:%M:%S', time.gmtime(time.clock())) #sanity check of the mva values writen in the hdf files #ifile=tables.openFile(file,'r') # if len(ifile.root.BDT_400_20.cols.BDT) != len(ifile.root.MPEFit.cols.Zenith): # ifile.close() # print "Something wrong with file: ", k, j+1 #exit() #os.system("rm "+"/data/icecube01/users/redlpete/IC59L2/TableIOL3/H5FilesIncludingScores/H5%0.2d%0.2d.hd5"%(k,j+1)) # ifile.close() except ImportError as exce: print "file does not exist", k, j + 1 print exce exit() ifile = tables.openFile("test.h5", mode='a') class Score(IsDescription): score = Float64Col() group = ifile.createGroup("/", 'Background', 'Scoreinfo') table = ifile.createTable(group, 'score', Score, "Example") particle = table.row for n in range(len(result)): particle['score'] = result[n] particle.append() print "--- End of event loop: %s" % sw.Print() target = TFile("TMVApp1.root", "RECREATE") for h in histList: h.Write() target.Close() print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms" print "==> TMVApplication is done!"
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] varList = varsList.varList for iVar in varList: factory.AddVariable(iVar, 'F') #factory.AddVariable( "NBTags",'I' ) # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables factory.AddSpectator("sampleName2") factory.AddSpectator("EVENT") factory.AddSpectator("svMass") factory.AddSpectator("met") factory.AddSpectator("fMass") factory.AddSpectator("CSVJ2") factory.AddSpectator("chi2KinFit") factory.AddSpectator("mJJ") factory.AddSpectator("category") factory.AddSpectator("triggerEff") factory.AddSpectator("initEvents") factory.AddSpectator("xs") # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data # sigChain = r.TChain("ttTreeFinal/eventTree") # bkg1Chain = r.TChain("ttTreeFinal/eventTree") # bkg2Chain = r.TChain("ttTreeFinal/eventTree") # Get the signal and background trees for training # tool.addFiles(ch=sigChain, dirName="/hdfs/store/user/zmao/H2hh260_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) # tool.addFiles(ch=bkg1Chain, dirName="/hdfs/store/user/zmao/tt_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) # tool.addFiles(ch=bkg2Chain, dirName="/hdfs/store/user/zmao/ZZ_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1) massPoint = infname Lumi = varsList.Lumi preFix = varsList.preFix infname = "H2hh%s_all_tightopposite%s3rdLepVeto.root" % (massPoint, varsList.region) iFileSig = TFile.Open(preFix + infname) sigChain = iFileSig.Get("eventTree") signalWeight = 1 factory.AddSignalTree(sigChain, signalWeight) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] for i in range(len(varsList.bkg)): bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1])) bkg_trees_list.append(bkg_list[i].Get("eventTree")) hist_list.append(bkg_list[i].Get('preselection')) bkg_trees_list[i].GetEntry(0) weightsList.append( (bkg_trees_list[i].xs) / hist_list[i].GetBinContent(1)) if bkg_trees_list[i].GetEntries() == 0: continue if varsList.bkg[i][0] != 'QCD': factory.AddBackgroundTree(bkg_trees_list[i], weightsList[i] * Lumi * 1000) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries() * weightsList[i] * Lumi * 1000) print "%s:\t\t%.2f" % (varsList.bkg[i][0], bkg_trees_list[i].GetEntries()) else: factory.AddBackgroundTree( bkg_trees_list[i], weightsList[i] * hist_list[i].GetBinContent(1)) print "%s:\t\t%.2f" % ( varsList.bkg[i][0], bkg_trees_list[i].GetEntries() * weightsList[i] * hist_list[i].GetBinContent(1)) # iFileBkg1 = TFile.Open(location+"TMVARegApp_tt_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg2 = TFile.Open(location+"TMVARegApp_ZZ_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg3 = TFile.Open(location+"TMVARegApp_tt_semi_eff_all%s_tightoppositebTag.root" %postName) # iFileBkg4 = TFile.Open(location+"TMVARegApp_DY2JetsToLL_all_tightoppositebTag.root") # iFileBkg5 = TFile.Open(location+"TMVARegApp_DY3JetsToLL_all_tightoppositebTag.root") # # iFileBkg6 = TFile.Open(location+"TMVARegApp_W1JetsToLNu_eff2_all_tightoppositebTag.root") # # iFileBkg7 = TFile.Open(location+"TMVARegApp_W2JetsToLNu_eff2_all_tightoppositebTag.root") # iFileBkg8 = TFile.Open(location+"TMVARegApp_W3JetsToLNu_all_tightoppositebTag.root") # # iFileBkg9 = TFile.Open(location+"TMVARegApp_WZJetsTo2L2Q_eff_all_tightoppositebTag.root") # iFileBkg = TFile.Open(location+"TMVARegApp_dataTotal_all%s_relaxedsamebTag.root" %postName) # bkg1Chain = iFileBkg1.Get("eventTree") # bkg2Chain = iFileBkg2.Get("eventTree") # bkg3Chain = iFileBkg3.Get("eventTree") # bkg4Chain = iFileBkg4.Get("eventTree") # bkg5Chain = iFileBkg5.Get("eventTree") # bkg6Chain = iFileBkg6.Get("eventTree") # bkg7Chain = iFileBkg7.Get("eventTree") # bkg8Chain = iFileBkg8.Get("eventTree") # bkg9Chain = iFileBkg9.Get("eventTree") # bkgChain = iFileBkg.Get("eventTree") # Global event weights (see below for setting event-wise weights) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # tmpHist1 = iFileBkg1.Get('preselection') # ttWeight = 26.2/tmpHist1.GetBinContent(1) # tmpHist2 = iFileBkg2.Get('preselection') # ZZWeight = 2.5/tmpHist2.GetBinContent(1) # tmpHist3 = iFileBkg3.Get('preselection') # tt_semiWeight = 109.3/tmpHist3.GetBinContent(1) # tmpHist4 = iFileBkg4.Get('preselection') # DY2JetsWeight = 181/tmpHist4.GetBinContent(1) # tmpHist5 = iFileBkg5.Get('preselection') # DY3JetsWeight = 51.1/tmpHist5.GetBinContent(1) # tmpHist6 = iFileBkg6.Get('preselection') # W1JetsToLNu = 5400/tmpHist6.GetBinContent(1) # tmpHist7 = iFileBkg7.Get('preselection') # W2JetsToLNu = 1750/tmpHist7.GetBinContent(1) # tmpHist8 = iFileBkg8.Get('preselection') # W3JetsToLNu = 519/tmpHist8.GetBinContent(1) # tmpHist9 = iFileBkg9.Get('preselection') # WZJetsTo2L2Q = 2.207/tmpHist9.GetBinContent(1) # print "tt:\t\t%.2f" %(bkg1Chain.GetEntries()*ttWeight*Lumi*1000) # print "ZZ:\t\t%.2f" %(bkg2Chain.GetEntries()*ZZWeight*Lumi*1000) # print "tt semi:\t%.2f" %(bkg3Chain.GetEntries()*tt_semiWeight*Lumi*1000) # print "DY2:\t\t%.2f" %(bkg4Chain.GetEntries()*DY2JetsWeight*Lumi*1000) # print "DY3:\t\t%.2f" %(bkg5Chain.GetEntries()*DY3JetsWeight*Lumi*1000) # print "WJ1:\t\t%.2f" %(bkg6Chain.GetEntries()*W1JetsToLNu*Lumi*1000) # print "WJ2:\t\t%.2f" %(bkg7Chain.GetEntries()*W2JetsToLNu*Lumi*1000) # print "WJ3:\t\t%.2f" %(bkg8Chain.GetEntries()*W3JetsToLNu*Lumi*1000) # print "WZJ:\t\t%.2f" %(bkg9Chain.GetEntries()*WZJetsTo2L2Q*Lumi*1000) # print "QCD:\t\t%.2f" %(bkgChain.GetEntries()*0.05) # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees # factory.AddBackgroundTree( bkgChain, 0.05) # factory.AddBackgroundTree( bkg1Chain, ttWeight*Lumi*1000) # factory.AddBackgroundTree( bkg2Chain, ZZWeight*Lumi*1000) # factory.AddBackgroundTree( bkg3Chain, tt_semiWeight*Lumi*1000) # factory.AddBackgroundTree( bkg4Chain, DY2JetsWeight*Lumi*1000) # factory.AddBackgroundTree( bkg5Chain, DY3JetsWeight*Lumi*1000) # factory.AddBackgroundTree( bkg6Chain, W1JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg7Chain, W2JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg8Chain, W3JetsToLNu*Lumi*1000) # factory.AddBackgroundTree( bkg9Chain, WZJetsTo2L2Q*Lumi*1000) factory.SetSignalWeightExpression('triggerEff') factory.SetBackgroundWeightExpression('triggerEff') # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); #factory.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("chi2KinFit > -10") mycutBkg = TCut("chi2KinFit > -10") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # Fisher discriminant (same as LD) # if "Fisher" in mlist: #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # factory.BookMethod( TMVA.Types.kFisher, "Fisher") if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros # gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) ChangeWeightName = 'mv /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_both_%s.xml' % massPoint os.system(ChangeWeightName)