def Evaluate(outdir): sys.stdout = open(outdir + '/tmva.log', 'w') # Output file output = TFile(outdir + '/tmva.root', 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory("TMVARuleFit", output, "!V:!Silent:Color" ) # Set the variables use for the analysis input = open(outdir + '/inputvars.txt') for variable in input.readlines(): factory.AddVariable(variable[:-1], 'F') # Set the weight directory TMVA.gConfig().GetIONames().fWeightFileDir = outdir + "/weights" # Limit the creation of correlation plots TMVA.gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots = 20 # Set the input file with signal and background events factory.SetInputTrees( outdir + '/signals.txt', outdir + '/backgrounds.txt' ) cutsig = TCut('') cutbkg = TCut('') factory.PrepareTrainingAndTestTree( cutsig, cutbkg, "SplitMode=Random:NormMode=NumEvents:!V" ) factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.00001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. output.Close()
def fit(self, X, y, X_test=None, y_test=None, weights=None, weights_test=None, signal_label=None, **kwargs): # (re)configure settings since deleting a previous Factory resets all # this. This is poor design, TMVA. config = TMVA.gConfig() config.GetIONames().fWeightFileDir = self.tmpdir config.SetSilent(True) config.SetDrawProgressBar(False) self.factory.DeleteAllMethods() extra_kwargs = dict() if self.task == "Regression": func = rnp.tmva.add_regression_events else: func = rnp.tmva.add_classification_events extra_kwargs["signal_label"] = signal_label # test exceptions assert_raises(TypeError, func, object(), X, y) assert_raises(ValueError, func, self.factory, X, y[: y.shape[0] / 2]) if weights is not None: assert_raises(ValueError, func, self.factory, X, y, weights=weights[: weights.shape[0] / 2]) assert_raises(ValueError, func, self.factory, X, y, weights=weights[:, np.newaxis]) assert_raises(ValueError, func, self.factory, [[[1, 2]]], [1]) assert_raises(ValueError, func, self.factory, [[1, 2]], [[[1]]]) func(self.factory, X, y, weights=weights, **extra_kwargs) if X_test is None: X_test = X y_test = y weights_test = weights func(self.factory, X_test, y_test, weights=weights_test, test=True, **extra_kwargs) self.factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents") options = [] for param, value in kwargs.items(): if value is True: options.append(param) elif value is False: options.append("!{0}".format(param)) else: options.append("{0}={1}".format(param, value)) options = ":".join(options) self.factory.BookMethod(self.method, self.method, options) self.factory.TrainAllMethods()
def main(): try: # retrive command line options shortopts = "a:o:r:vh?" longopts = ["analysis=","outputfile=", "regression=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) _outfname = OUTFNAME _analysis = ANALYSIS verbose = False _regression = REGRESSION for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-o", "--outputfile"): _outfname = a elif o in ("-a", "--analysis"): _analysis = a elif o in ("-r", "--regression"): _regression = True elif o in ("-v", "--verbose"): verbose = True # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( _outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" TMVA.gConfig().GetIONames().fWeightFileDir = "weights_" + _analysis # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] if _analysis == "Dijet": if not _regression: factory.AddVariable("H_mass := H.mass", 'F'); factory.AddVariable("H_pt :=H.pt", 'F'); factory.AddVariable("hJet_pt1 := hJet_pt[0]", 'F') factory.AddVariable("hJet_pt2 := hJet_pt[1]", 'F') else: factory.AddVariable("HCorr_mass := newHiggsMass", 'F'); factory.AddVariable("HCorr_pt := newHiggsPt", 'F'); factory.AddVariable("hJ1Corr_pt := hJet_genPtReg0", 'F'); factory.AddVariable("hJ2Corr_pt := hJet_genPtReg1", 'F'); factory.AddVariable("V_pt :=V.pt", 'F'); factory.AddVariable("H_dR := H.dR", 'F'); factory.AddVariable("hJ12_MaxCsv := max(hJet_csv[0],hJet_csv[1])", 'F'); factory.AddVariable("hJ12_MinCsv := min(hJet_csv[0],hJet_csv[1])", 'F'); factory.AddVariable("HV_dPhi := HVdPhi", 'F'); factory.AddVariable("H_dEta := H.dEta", 'F'); factory.AddVariable("NAddJet:=Sum$(aJet_pt>20 && abs(aJet_eta)<4.5)", 'I' ); factory.AddVariable("dPull := deltaPullAngle", 'F'); # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables #factory.AddSpectator("hJet_pt1 := hJet_pt[0]", 'F'); #factory.AddSpectator("hJet_pt2 := hJet_pt[1]", 'F'); elif _analysis == "Subjet": if not _regression: factory.AddVariable("H_mass := FatH.filteredmass", 'F'); factory.AddVariable("H_pt := FatH.filteredpt", 'F'); factory.AddVariable("SJ1_pt := fathFilterJets_pt[0]", 'F'); factory.AddVariable("SJ2_pt := fathFilterJets_pt[1]", 'F'); factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); else: factory.AddVariable("HCorr_mass := newfatHiggsMass", 'F'); factory.AddVariable("HCorr_pt := newfatHiggsPt", 'F'); factory.AddVariable("SJ1Corr_pt := fathFilterJets_genPtReg0", 'F'); factory.AddVariable("SJ2Corr_pt := fathFilterJetsx_genPtReg1", 'F'); factory.AddVariable("SJ3_pt := Alt$(fathFilterJets_pt[2],0)", 'F'); #change later factory.AddVariable("V_pt := V.pt", 'F'); factory.AddVariable("HV_dPhi := " +\ "FatH.filteredphi - V.phi > pi ? " +\ "abs(FatH.filteredphi - V.phi - 2*pi) : " +\ "FatH.filteredphi - V.phi < -pi ? " +\ "abs(FatH.filteredphi - V.phi + 2*pi) : " +\ "abs(FatH.filteredphi - V.phi)", 'F' ) factory.AddVariable("SJ1_csv := fathFilterJets_csv[0]", 'F'); factory.AddVariable("SJ2_csv := fathFilterJets_csv[1]", 'F'); factory.AddVariable("SJ3_csv := Alt$(fathFilterJets_csv[2],0)", 'F'); factory.AddVariable("SJ12_dEta := " +\ "nfathFilterJets < 2 ? 0 : " +\ "fabs(fathFilterJets_eta[0] - fathFilterJets_eta[1] )", 'F'); factory.AddVariable("SJ13_dEta := " +\ "nfathFilterJets < 3 ? 0 : " +\ "abs( fathFilterJets_eta[0] - Alt$(fathFilterJets_eta[2],0))", 'F'); factory.AddVariable("SJ12_dPhi := " +\ "nfathFilterJets < 2 ? 0 : " +\ "fathFilterJets_phi[0] - fathFilterJets_phi[1] > pi ? " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] - 2*pi) : " +\ "fathFilterJets_phi[0] - fathFilterJets_phi[1] < -pi ? " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1] + 2*pi) : " +\ "abs( fathFilterJets_phi[0] - fathFilterJets_phi[1])", 'F'); factory.AddVariable("SJ13_dPhi := " +\ "nfathFilterJets < 3 ? 0 : " +\ "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) > pi ? " +\ "abs(fathFilterJets_phi[0] - " +\ "Alt$(fathFilterJets_phi[2],0) - 2*pi) : " +\ "fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0) < -pi ? " +\ "abs(fathFilterJets_phi[0] - " +\ "Alt$(fathFilterJets_phi[2],0) + 2*pi) : " +\ "abs(fathFilterJets_phi[0] - Alt$(fathFilterJets_phi[2],0))", 'F'); factory.AddVariable("SJ12_dR := " +\ "nfathFilterJets < 2 ? 0 : " +\ "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],fathFilterJets_eta[1],fathFilterJets_phi[1])", 'F'); factory.AddVariable("SJ13_dR := nfathFilterJets < 3 ? 0 : " +\ "deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],Alt$(fathFilterJets_eta[2],0),Alt$(fathFilterJets_phi[2],0))", 'F'); factory.AddVariable("NAddJet:= " +\ "nfathFilterJets < 2 ? 0 : " +\ "Sum$(aJet_pt>20 && abs(aJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],aJet_eta,aJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],aJet_eta,aJet_phi)>0.3)+Sum$(hJet_pt>20 && abs(hJet_eta)<4.5 && deltaR(fathFilterJets_eta[0],fathFilterJets_phi[0],hJet_eta,hJet_phi)>0.3 && deltaR(fathFilterJets_eta[1],fathFilterJets_phi[1],hJet_eta,hJet_phi)>0.3)", 'I' ); else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) ## Get the Signal and Background trees for Sample in SAMPLES.keys(): SampleInfo=SAMPLES[Sample] SampleType=SampleInfo[0] # signal or background infile=os.path.join(INPUTDIR,SampleInfo[1]) xs=SampleInfo[2] ## get number of step 1 events f=TFile.Open(infile) h = f.Get("Count") nEVT=int(h.GetBinContent(1)) wt =xs/(nEVT) print Sample,": ",infile print "XS:nEVT:wt: ", xs,nEVT,wt theTree = f.Get( TREE ) if SampleType == "S": factory.AddSignalTree ( theTree, wt ) elif SampleType == "B": factory.AddBackgroundTree( theTree, wt ) else: print "Trouble extracting SampleType for this sample" sys.exit(1) # table10 AN-2011/430 if _analysis == "Dijet": cutString=\ "Vtype == 0" + " && " +\ "vLepton_pt[0]>20." + " && " +\ "H.HiggsFlag > 0" + " && " +\ "V.mass > 75.0" + " && " +\ "V.mass < 105.0" + " && " +\ "V.pt > 100.0" + " && " +\ "max(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " +\ "min(hJet_csv[0],hJet_csv[1]) > 0.244" + " && " if not _regression: cutString += \ "hJet_pt[0] > 20.0" + " && " +\ "hJet_pt[1] > 20.0" + " && " +\ "H.mass > 80.0" + " && " +\ "H.mass < 150.0" else: cutString += \ "hJet_genPtReg0 > 20.0" + " && " +\ "hJet_genPtReg0 > 20.0" + " && " +\ "newHiggsMass > 80.0 && newHiggsMass < 150.0" elif _analysis == "Subjet": cutString=\ "Vtype == 0" + " && " +\ "vLepton_pt[0]>20." + " && " +\ "FatH.FatHiggsFlag > 0" + " && " +\ "V.mass > 75.0" + " && " +\ "V.mass < 105.0" + " && " +\ "V.pt > 100.0" + " && " +\ "nfathFilterJets >= 2" + " && " +\ "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && " +\ "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.244" + " && " if not _regression: cutString += \ "fathFilterJets_pt[0] > 20.0" + " && " +\ "fathFilterJets_pt[1] > 20.0" + " && " +\ "FatH.filteredmass > 80.0" + " && " +\ "FatH.filteredmass < 150.0" else: cutString += \ "fathFilterJets_genPtReg0 > 20.0" + " && " +\ "fathFilterJets_genPtReg0 > 20.0" + " && " +\ "newfatHiggsMass > 80.0 && newfatHiggsMass < 150.0" else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) print cutString mycutSig = TCut( cutString ) mycutBkg = TCut( cutString ) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples prepareOptions="nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=None:!V" #prepareOptions="SplitMode=Random:!V" factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, prepareOptions) bdtOptions = \ "!H" + ":" +\ "!V" + ":" +\ "NTrees=850" + ":" +\ "nEventsMin=150" + ":" +\ "MaxDepth=3" + ":" +\ "BoostType=AdaBoost" + ":" +\ "AdaBoostBeta=0.3" + ":" +\ "SeparationType=GiniIndex" + ":" +\ "nCuts=20" + ":" +\ "PruneMethod=NoPruning" # "PruneMethod=CostComplexity" # print bdtOptions factory.BookMethod( TMVA.Types.kBDT, "BDT", bdtOptions) # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % _outfname print "=== TMVAClassification is done!\n"
branches_miniTree = {} print "Initializing TMVA..." # Setup TMVA TMVA.Tools.Instance() #TMVA.PyMethodBase.PyInitialize() mvaReaderList = [ 'readerA_3glb', 'readerB_3glb', 'readerC_3glb', 'readerA_2glbTrk', 'readerB_2glbTrk', 'readerC_2glbTrk' ] mvaReaders = {} mvaWeights = {} print "Setting up reader..." for reader in mvaReaderList: mvaReaders[reader] = TMVA.Reader('Color:!Silent') tmpstr = 'TMVAClassification_2016vars_' + fileStr(reader) mvaWeights[reader] = TString( BASEDIR + tmpstr + '/weights/' + tmpstr + '_BDT.weights.xml' ) # weights weights.xml file after training, place it to CommonFiles # Open input file and set branch addresses rootfile = TFile(filename, 'READ') rootfile.ls() tree_bkg = rootfile.Get('TreeB') tree_ds = rootfile.Get('TreeS_Ds') tree_bu = rootfile.Get('TreeS_Bu') tree_bd = rootfile.Get('TreeS_Bd') for branchName in varlist_train: branches[branchName] = array('f', [-999])
from ROOT import TMVA, TFile, TTree, TCut import ROOT # Setup TMVA TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('Classification.root', 'RECREATE') factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') data = TFile.Open('ML_train.root') signal = data.Get('signal_train') #naming things signal breaks ROOT background = data.Get('background_train') dataloader = TMVA.DataLoader('dataset') n = 0 for branch in signal.GetListOfBranches(): dataloader.AddVariable(branch.GetName()) #loading data n += 1 dataloader.AddSignalTree(signal, 1.0) ### second arg is weight dataloader.AddBackgroundTree(background, 1.0) dataloader.PrepareTrainingAndTestTree(TCut(''), 'nTrain_Signal=25000:nTrain_Background=6716:SplitMode=Random:NormMode=NumEvents:!V') # controlling training populations ### numbers here chosen to keep nTrain_Signal/Total_Train = n_Test_Signal/Total_Test (equal ratios) #'nTest_Signal=2000:nTest_Background=2000:SplitMode=Random:NormMode=NumEvents:!V') # controlling testing populations factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT", "MaxDepth=5:NTrees=1600:BoostType=AdaBoost:AdaBoostBeta=0.5" ) # Run training, test and evaluation factory.TrainAllMethods()
def process_event(i_final_histo_name_, i_input_signal_file_name_, i_input_bkg_file_name_): print "at start of process event" root.TMVA.Tools.Instance() #root.TMVA.PyMethodBase.PyInitialize() _output_training_name_ = i_final_histo_name_ _input_signal_name_ = i_input_signal_file_name_ _input_bkg_ee_qq_name_ = i_input_bkg_file_name_[0] _input_bkg_ee_qqqq_name_ = i_input_bkg_file_name_[1] _input_bkg_ee_qqqqqq_name_ = i_input_bkg_file_name_[2] fileout = root.TFile(_output_training_name_, "RECREATE") input_file_signal_ = root.TFile.Open(_input_signal_name_) input_file_bkg_ee_qq_ = root.TFile.Open(_input_bkg_ee_qq_name_) input_file_bkg_ee_qqqq_ = root.TFile.Open(_input_bkg_ee_qqqq_name_) input_file_bkg_ee_qqqqqq_ = root.TFile.Open(_input_bkg_ee_qqqqqq_name_) tree_sig = input_file_signal_.Get("MVATrainingVariables") tree_bkg_ee_qq = input_file_bkg_ee_qq_.Get("MVATrainingVariables") tree_bkg_ee_qqqq = input_file_bkg_ee_qqqq_.Get("MVATrainingVariables") tree_bkg_ee_qqqqqq = input_file_bkg_ee_qqqqqq_.Get("MVATrainingVariables") #flags V is if verbose set or not (typically false) , if it should be silent or not (silent== no output from mva, default false) ,transformations can be #I;D;P;U;G,D,for identity, decorrelation,PCA,Uniform and Gaussianisation followed by decorrelation transformations #analysistype can be Classification,Regression,Multiclass or Auto (default Auto) factory = TMVA.Factory( "TMVAClassification", fileout, ":".join([ "!V", "!Silent", "Color", "DrawProgressBar", "Transformations=I;D;P;G,D", "AnalysisType=Classification" ])) dataloader = TMVA.DataLoader('dataset') dataloader.SetWeightExpression("eventWeight") dataloader.SetSignalWeightExpression("eventWeight") dataloader.SetBackgroundWeightExpression("eventWeight") dataloader.AddVariable("jet1_mass", 'F') dataloader.AddVariable("jet2_mass", 'F') dataloader.AddVariable("jet1_theta", 'F') dataloader.AddVariable("jet2_theta", 'F') dataloader.AddVariable("jet1_D2_beta1", 'F') dataloader.AddVariable("jet2_D2_beta1", 'F') dataloader.AddVariable("jet1_BTag_rfj_BTagMax", 'F') dataloader.AddSpectator("jet1_CTag_rfj_CTagMax", 'F') #dataloader.AddVariable("jet1_BTag_rfj_BTagMax", 'F') dataloader.AddVariable("jet1_C2_beta1", 'F') dataloader.AddVariable("jet2_C2_beta1", 'F') dataloader.AddVariable("jet1_tau21", 'F') dataloader.AddVariable("jet2_tau21", 'F') dataloader.AddSpectator("costheta1_for_Atheta1", 'F') dataloader.AddSpectator("costheta2_for_Atheta1theta2", 'F') dataloader.AddSpectator("phi_for_Aphis", 'F') dataloader.AddVariable("reco_y32", 'F') dataloader.AddSpectator("reco_y43", 'F') dataloader.AddSpectator("dphi_j1j2", 'F') dataloader.AddSpectator("jet1_d21", 'F') dataloader.AddSpectator("jet1_d32", 'F') dataloader.AddSpectator("jet1_d43", 'F') dataloader.AddSpectator("jet2_d21", 'F') dataloader.AddSpectator("jet2_d32", 'F') dataloader.AddSpectator("jet2_d43", 'F') dataloader.AddVariable("jet1_C3_beta1", 'F') dataloader.AddVariable("jet2_C3_beta1", 'F') dataloader.AddSpectator("jet1_N2_beta1", 'F') dataloader.AddSpectator("jet2_N2_beta1", 'F') dataloader.AddSpectator("jet1_N3_beta1", 'F') dataloader.AddSpectator("jet2_N3_beta1", 'F') def_weight = 1 dataloader.AddSignalTree(tree_sig, def_weight, "Training and Testing") dataloader.AddBackgroundTree(tree_bkg_ee_qq, def_weight, "Training and Testing") dataloader.AddBackgroundTree(tree_bkg_ee_qqqq, def_weight, "Training and Testing") dataloader.AddBackgroundTree(tree_bkg_ee_qqqqqq, def_weight, "Training and Testing") #method = factory.BookMethod(ROOT.TMVA.Types.kBDT, "BDT", # ":".join([ # "!H", # "!V", # "NTrees=850",-->default 800 # "nEventsMin=150", -->default is 0 # "MaxDepth=3",-->default # "BoostType=AdaBoost",-->default # "AdaBoostBeta=0.5", -->default # "SeparationType=GiniIndex",-->default, e.g. SDivSqrtSPlusB" # "nCuts=20",-->default # "PruneMethod=NoPruning",-->default # ])) #factory.TrainAllMethods() #factory.TestAllMethods() #factory.EvaluateAllMethods() cut_S = TCut("") cut_B = TCut("") dataloader.PrepareTrainingAndTestTree( cut_S, cut_B, ":".join([ "V", "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random", "NormMode=NumEvents", #"NormMode=None", ])) factory.BookMethod( dataloader, root.TMVA.Types.kBDT, "BDT", ":".join([ "!H", "V", "NTrees=300", "MaxDepth=3", #"BoostType=Grad", #"Shrinkage=1.00", "BoostType=AdaBoost", "AdaBoostBeta=0.20", #"SeparationType=GiniIndexWithLaplace", "SeparationType=GiniIndex", "nCuts=-1", "PruneMethod=NoPruning", #"SkipNormalization", ])) #factory.WriteDataInformation() factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() fileout.Close() return None
## This tutorial shows how to apply a trained model to new data (regression). ## ## \macro_code ## ## \date 2017 ## \author TMVA Team from ROOT import TMVA, TFile, TString from array import array from subprocess import call from os.path import isfile # Setup TMVA TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() reader = TMVA.Reader("Color:!Silent") # Load data if not isfile('tmva_reg_example.root'): call(['curl', '-O', 'http://root.cern.ch/files/tmva_reg_example.root']) data = TFile.Open('tmva_reg_example.root') tree = data.Get('TreeR') branches = {} for branch in tree.GetListOfBranches(): branchName = branch.GetName() branches[branchName] = array('f', [-999]) tree.SetBranchAddress(branchName, branches[branchName]) if branchName != 'fvalue': reader.AddVariable(branchName, branches[branchName])
def RunTraining(dataset, optimization): # Setup TMVA TMVA.PyMethodBase.PyInitialize() output = TFile.Open('GGFKiller_%s_%s.root' % (dataset, optimization), 'RECREATE') factory = TMVA.Factory( 'TMVAClassification%s%s' % (dataset, optimization), output, '!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification' ) #Locate and add data files file_GGF_HH_2016 = "../FullNtuples/New2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_PM.root" file_VBF_HH_2016 = "../FullNtuples/New2016/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph_PM.root" file_VBF2_HH_2016 = "../FullNtuples/New2016/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_13TeV-madgraph_PM.root" file_GGF_HH_2017 = "../FullNtuples/New2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg_PM.root" file_VBF_HH_2017 = "../FullNtuples/New2017/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph_PM.root" file_VBF2_HH_2017 = "../FullNtuples/New2017/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_13TeV-madgraph_PM.root" file_GGF_HH_2018 = "../FullNtuples/New2018/SKIM_GluGluToHHTo4B_node_SM_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root" file_VBF_HH_2018 = "../FullNtuples/New2018/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root" file_VBF2_HH_2018 = "../FullNtuples/New2018/SKIM_VBFHHTo4B_CV_1_C2V_2_C3_1_TuneCP5_PSWeights_13TeV-madgraph-pythia8_PM.root" #Add the data files ch_sig = TChain("bbbbTree") ch_bkg = TChain("bbbbTree") #AddfILES if optimization == "SM": seed = 2020 if dataset == '2016': ch_sig.AddFile(file_VBF_HH_2016) ch_bkg.AddFile(file_GGF_HH_2016) elif dataset == '2017': ch_sig.AddFile(file_VBF_HH_2017) ch_bkg.AddFile(file_GGF_HH_2017) elif dataset == '2018': ch_sig.AddFile(file_VBF_HH_2018) ch_bkg.AddFile(file_GGF_HH_2018) else: ch_sig.AddFile(file_VBF_HH_2016) ch_bkg.AddFile(file_GGF_HH_2016) ch_sig.AddFile(file_VBF_HH_2017) ch_bkg.AddFile(file_GGF_HH_2017) ch_sig.AddFile(file_VBF_HH_2018) ch_bkg.AddFile(file_GGF_HH_2018) else: if dataset == '2016': seed = 2017 ch_sig.AddFile(file_VBF2_HH_2016) ch_bkg.AddFile(file_GGF_HH_2016) elif dataset == '2017': seed = 2020 ch_sig.AddFile(file_VBF2_HH_2017) ch_bkg.AddFile(file_GGF_HH_2017) elif dataset == '2018': seed = 2021 ch_sig.AddFile(file_VBF2_HH_2018) ch_bkg.AddFile(file_GGF_HH_2018) else: ch_sig.AddFile(file_VBF2_HH_2016) ch_bkg.AddFile(file_GGF_HH_2016) ch_sig.AddFile(file_VBF2_HH_2017) ch_bkg.AddFile(file_GGF_HH_2017) ch_sig.AddFile(file_VBF2_HH_2018) ch_bkg.AddFile(file_GGF_HH_2018) #Load data to TMVA dataloader = TMVA.DataLoader('GGFKiller') if optimization == "SM": dataloader.AddVariable("H1_pt") dataloader.AddVariable("H2_pt") dataloader.AddVariable("JJ_j1_pt") dataloader.AddVariable("JJ_j2_pt") dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)") dataloader.AddVariable("h1h2_deltaR") dataloader.AddVariable("h1j1_deltaR") dataloader.AddVariable("h1j2_deltaR") dataloader.AddVariable("h2j1_deltaR") dataloader.AddVariable("h2j2_deltaR") #dataloader.AddVariable("abs_j1etaj2eta:=abs(j1etaj2eta)") dataloader.AddVariable("abs_costh_JJ_j1_vbfcm:=abs(costh_JJ_j1_vbfcm)") dataloader.AddVariable("abs_costh_JJ_j2_vbfcm:=abs(costh_JJ_j2_vbfcm)") #dataloader.AddVariable("j1j2_deltaEta") dataloader.AddVariable("JJ_m") else: dataloader.AddVariable("H1_pt") dataloader.AddVariable("H2_pt") dataloader.AddVariable("JJ_j1_pt") dataloader.AddVariable("JJ_j2_pt") dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)") dataloader.AddVariable("h1h2_deltaR") dataloader.AddVariable("h1j1_deltaR") dataloader.AddVariable("h1j2_deltaR") dataloader.AddVariable("h2j1_deltaR") dataloader.AddVariable("h2j2_deltaR") dataloader.AddVariable("abs_costh_JJ_j1_vbfcm:=abs(costh_JJ_j1_vbfcm)") dataloader.AddVariable("abs_costh_JJ_j2_vbfcm:=abs(costh_JJ_j2_vbfcm)") #dataloader.AddVariable("j1j2_deltaEta") dataloader.AddVariable("JJ_m") trainingsamplefraction = 0.50 nTrain_Signal = int( ch_sig.GetEntries('VBFEvent==1') * trainingsamplefraction) nTrain_Background = int( ch_bkg.GetEntries('VBFEvent==1') * trainingsamplefraction) print("[INFO] ML TRAINING STARTING . . .") print("[INFO] Signal/Background Training Fraction is %f" % trainingsamplefraction) dataloader.AddSignalTree(ch_sig, 1.0) dataloader.AddBackgroundTree(ch_bkg, 1.0) dataloader.SetSignalWeightExpression('XS*preVBFSelEff') dataloader.SetBackgroundWeightExpression('XS*preVBFSelEff') dataloader.PrepareTrainingAndTestTree( TCut('VBFEvent==1'), 'nTrain_Signal=%i:nTrain_Background=%i:SplitMode=Random:!V:SplitSeed=%i' % (nTrain_Signal, nTrain_Background, seed)) print("[INFO] Boosted Decision Tree Training Starting . . .") if optimization == 'SM': #best option FOR SM if dataset == '2016': nTrees = [200] nCuts = [200] nDepth = [2] elif dataset == '2017': nTrees = [250] nCuts = [200] nDepth = [2] else: nTrees = [200] nCuts = [200] nDepth = [2] else: #best option for BSM if dataset == '2016': nTrees = [200] nCuts = [200] nDepth = [2] elif dataset == '2017': nTrees = [300] nCuts = [350] nDepth = [2] elif dataset == '2018': nTrees = [200] nCuts = [200] nDepth = [2] else: nTrees = [160] nCuts = [160] nDepth = [3] for i in nTrees: for j in nCuts: for k in nDepth: factory.BookMethod( dataloader, TMVA.Types.kBDT, 'BDT_%i_%i_%i' % (i, j, k), '!H:!V:NTrees=%i:MinNodeSize=2.5:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=%i:MaxDepth=%i' % (i, j, k)) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods()
def main(): # runs the program try: # retrieve command line options shortopts = "o:w:y:v:s:h?" # possible command line options longopts = [ "outputfile=", "where=", "year=", "verbose", "seed=", "help", "usage" ] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts) # associates command line inputs to variables except getopt.GetoptError: # output error if command line argument invalid print("ERROR: unknown options in argument %s" % sys.argv[1:]) usage() sys.exit(1) myArgs = np.array([ # Stores the command line arguments ['-o', '--outputfile', 'outfname', DEFAULT_OUTFNAME], ['-v', '--verbose', 'verbose', True], ['-w', '--where', 'where', "lpc"], ['-y', '--year', 'year', 2017], ['-s', '--seed', 'SeedN', DEFAULT_SEED], ]) for opt, arg in opts: if opt in myArgs[:, 0]: index = np.where( myArgs[:, 0] == opt)[0][0] # np.where returns a tuple of arrays myArgs[ index, 3] = arg # override the variables with the command line argument elif opt in myArgs[:, 1]: index = np.where(myArgs[:, 1] == opt)[0][0] myArgs[index, 3] = arg if opt in ("-?", "-h", "--help", "--usage"): # provides command line help usage() sys.exit(0) # Initialize some variables after reading in arguments SeedN_index = np.where(myArgs[:, 2] == 'SeedN')[0][0] outfname_index = np.where(myArgs[:, 2] == 'outfname')[0][0] verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0] where_index = np.where(myArgs[:, 2] == 'where')[0][0] year_index = np.where(myArgs[:, 2] == 'year')[0][0] seed = myArgs[SeedN_index, 3] where = myArgs[where_index, 3] year = int(myArgs[year_index, 3]) varList = varsList.varList["DNN"] var_length = len(varList) str_xbitset = '{:0{}b}'.format(long(myArgs[SeedN_index, 3]), var_length) nVars = str_xbitset.count('1') outf_key = "DNN_" + str(nVars) + "vars" myArgs[outfname_index, 3] = "dataset/weights/TMVA_" + outf_key + ".root" print("Seed: {}".format(str_xbitset)) outputfile = TFile(myArgs[outfname_index, 3], 'RECREATE') checkRootVer() # check that ROOT version is correct ###################################################### ###################################################### ###### ###### ###### T M V A ###### ###### ###### ###################################################### ###################################################### # Declare some containers sig_list = [] sig_trees_list = [] bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] if where == "brux": if year == 2017: inputDir = varsList.inputDirBRUX2017 elif year == 2018: inputDir = varsList.inputDirBRUX2018 else: inputDir = varsList.inputDirCondor # Set up TMVA ROOT.TMVA.Tools.Instance() ROOT.TMVA.PyMethodBase.PyInitialize() fClassifier = TMVA.Factory( 'VariableImportance', '!V:!ROC:Silent:!Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification' ) fClassifier.SetVerbose(bool(myArgs[verbose_index, 3])) loader = TMVA.DataLoader("dataset/" + str_xbitset) for indx, var in enumerate(varList): if (str_xbitset[indx] == '1'): if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0], var[1], var[2], "I") else: loader.AddVariable(var[0], var[1], var[2], "F") # add signals to loader if year == 2017: for i in range(len(varsList.sig2017_0)): sig_list.append(TFile.Open(inputDir + varsList.sig2017_0[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) elif year == 2018: for i in range(len(varsList.sig2018_0)): sig_list.append(TFile.Open(inputDir + varsList.sig2018_0[i])) sig_trees_list.append(sig_list[i].Get("ljmet")) sig_trees_list[i].GetEntry(0) loader.AddSignalTree(sig_trees_list[i]) # add backgrounds to loader if year == 2017: for i in range(len(varsList.bkg2017_0)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_0[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) elif year == 2018: for i in range(len(varsList.bkg2018_0)): bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_0[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i]) # set signal and background weights loader.SetSignalWeightExpression(weightStrS) loader.SetBackgroundWeightExpression(weightStrB) # set cut thresholds for signal and background mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) NSIG = 0 NSIG_TEST = 0 NBKG = 0 NBKG_TEST = 0 loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=" + str(NSIG) + \ ":nTrain_Background=" + str(NBKG) + \ ":nTest_Signal=" + str(NSIG_TEST) + \ ":nTest_Background=" + str(NBKG_TEST) + \ ":SplitMode=Random:NormMode=NumEvents:!V" ) ##################################################### ##################################################### ###### ###### ###### K E R A S D N N ###### ###### ###### ##################################################### ##################################################### model_name = "TTTT_TMVA_model.h5" model = Sequential() model.add( Dense(100, input_dim=nVars, kernel_initializer="glorot_normal", activation="relu")) for i in range(2): model.add(BatchNormalization()) model.add( Dense(100, kernel_initializer="glorot_normal", activation="relu")) model.add(Dense(2, activation="sigmoid")) model.compile(loss="categorical_crossentropy", optimizer=Adam(), metrics=["accuracy"]) model.save(model_name) model.summary() ###################################################### ###################################################### ###### ###### ###### T M V A ###### ###### ###### ###################################################### ###################################################### # Declare some containers kerasSetting = "!H:!V:VarTransform=G:FilenameModel=" + model_name + \ ":NumEpochs=15:BatchSize=512" # the trained model has to be specified in this string # run the classifier fClassifier.BookMethod(loader, TMVA.Types.kPyKeras, "PyKeras", kerasSetting) (TMVA.gConfig().GetIONames() ).fWeightFileDir = str_xbitset + "/weights/" + outf_key #print("New weight file directory: {}".format((TMVA.gConfig().GetIONames()).fWeightFileDir)) fClassifier.TrainAllMethods() fClassifier.TestAllMethods() fClassifier.EvaluateAllMethods() SROC = fClassifier.GetROCIntegral("dataset/" + str_xbitset, "PyKeras") print("ROC-integral: {}".format(SROC)) fClassifier.DeleteAllMethods() fClassifier.fMethodsMap.clear() outputfile.Close()
import ROOT, array import ROOT, array from ROOT import TFile, TH1F, TGraph, TCanvas, TLegend, TTree, TList from ROOT import TMVA, TMath import sys print sys.argv process = sys.argv[1] reader = TMVA.Reader("!V") nJet = array.array('f',[0]) mindr_lep1_jet = array.array('f',[0]) mindr_lep2_jet = array.array('f',[0]) mindr_lep3_jet = array.array('f',[0]) avg_dr_jet = array.array('f',[0]) lep1_abs_eta = array.array('f',[0]) lep2_abs_eta = array.array('f',[0]) lep3_abs_eta = array.array('f',[0]) max_lep_eta = array.array('f',[0]) lep1_conePt = array.array('f',[0]) lep2_conePt = array.array('f',[0]) lep3_conePt = array.array('f',[0]) mindr_tau_jet = array.array('f',[0]) ptmiss = array.array('f',[0]) mT_lep1 = array.array('f',[0]) mT_lep2 = array.array('f',[0]) mT_lep3 = array.array('f',[0]) htmiss = array.array('f',[0]) dr_leps = array.array('f',[0]) tau_pt = array.array('f',[0]) tau_abs_eta = array.array('f',[0])
# Declare Factory from ROOT import TMVA, TFile, TTree, TCut, TString # Declare Variables in DataLoader TMVA.Tools.Instance() inputFile = TFile.Open( "https://github.com/iml-wg/tmvatutorials/raw/master/inputdata.root") outputFile = TFile.Open("TMVAOutputDNN.root", "RECREATE") factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification") # Declare Variables in DataLoader loader = TMVA.DataLoader("dataset_dnn") loader.AddVariable("var1") loader.AddVariable("var2") loader.AddVariable("var3") loader.AddVariable("var4") loader.AddVariable("var5 := var1-var3") loader.AddVariable("var6 := var1+var2") # Setup Dataset(s) tsignal = inputFile.Get("Sig") tbackground = inputFile.Get("Bkg") loader.AddSignalTree(tsignal) loader.AddBackgroundTree(tbackground) loader.PrepareTrainingAndTestTree(
## \macro_code ## ## \author Lailin XU from ROOT import TMVA, TFile, TTree, TCut, TH1F, TCanvas, gROOT, TLegend from subprocess import call from os.path import isfile from array import array gROOT.SetStyle("ATLAS") # Setup TMVA TMVA.Tools.Instance() # Reader. One reader for each application. reader = TMVA.Reader("Color:!Silent") reader_S = TMVA.Reader("Color:!Silent") reader_B = TMVA.Reader("Color:!Silent") # Inputs # ============= # Load data # An unknown sample trfile = "Zp2TeV_ttbar.root" data = TFile.Open(trfile) tree = data.Get('tree') # Known signal trfile_S = "Zp1TeV_ttbar.root" data_S = TFile.Open(trfile_S) tree_S = data_S.Get('tree')
def RunTraining(dataset,optimization): # Setup TMVA TMVA.PyMethodBase.PyInitialize() output = TFile.Open('GGFQCDKiller_BR_%s_%s.root'%(dataset,optimization), 'RECREATE') factory = TMVA.Factory('TMVAClassification%s%s'%(dataset,optimization), output, '!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification') #Load Data #Load 2016 SIMULATION file_GGF_HH_2016 = "../FullNtuples/New2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_BR.root" file_QCD_MODEL_2016 = "../FullNtuples/New2016/SKIM_BKG_MODEL_BR.root" #Load 2017 SIMULATION file_GGF_HH_2017 = "../FullNtuples/New2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg_BR.root" file_QCD_MODEL_2017 = "../FullNtuples/New2017/SKIM_BKG_MODEL_BR.root" #Load 2018 SIMULATION file_GGF_HH_2018 = "../FullNtuples/New2018/SKIM_GluGluToHHTo4B_node_SM_TuneCP5_PSWeights_13TeV-madgraph-pythia8_BR.root" file_QCD_MODEL_2018 = "../FullNtuples/New2018/SKIM_BKG_MODEL_BR.root" #COMBINE THE INFORMATION ch_sig = TChain("bbbbTree") ch_bkg = TChain("bbbbTree") #AddfILES if dataset=='2016': ch_sig.AddFile(file_GGF_HH_2016) ch_bkg.AddFile(file_QCD_MODEL_2016) elif dataset=='2017': ch_sig.AddFile(file_GGF_HH_2017) ch_bkg.AddFile(file_QCD_MODEL_2017) elif dataset=='2018': ch_sig.AddFile(file_GGF_HH_2018) ch_bkg.AddFile(file_QCD_MODEL_2018) else: ch_sig.AddFile(file_GGF_HH_2016) ch_bkg.AddFile(file_QCD_MODEL_2016) #LOAD VARIABLES dataloader = TMVA.DataLoader('GGFQCDKiller') dataloader.AddVariable("H1_pt") dataloader.AddVariable("H2_pt") dataloader.AddVariable("H1_m") dataloader.AddVariable("H2_m") dataloader.AddVariable("HH_m") dataloader.AddVariable("h1h2_deltaEta") dataloader.AddVariable("H1_bb_deltaR") dataloader.AddVariable("H2_bb_deltaR") dataloader.AddVariable("abs_costh_HH_b1_cm:=abs(costh_HH_b1_ggfcm)") dataloader.AddVariable("HH_btag_b3_bscore") dataloader.AddVariable("HH_btag_b3_bres") trainingsamplefraction = 0.50 #next,also try 0.5 0.6 nTrain_Signal = int(ch_sig.GetEntries('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)')*trainingsamplefraction) nTrain_Background = int(ch_bkg.GetEntries('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)')*trainingsamplefraction) print("[INFO] ML TRAINING STARTING . . .") print("[INFO] Signal/Background Training Fraction is %f"%trainingsamplefraction) dataloader.AddSignalTree(ch_sig, 1.0) dataloader.AddBackgroundTree(ch_bkg, 1.0) dataloader.SetBackgroundWeightExpression('Weight_AnaGGF') #best option FOR SM if optimization=='SM': if dataset=='2016': seed=2019 nTrees =[350] nCuts = [200] nDepth = [2] elif dataset=='2017': seed=2019 nTrees =[300] nCuts = [300] nDepth = [2] elif dataset=='2018': seed=2020 nTrees =[250] nCuts = [250] nDepth = [2] elif dataset=='20172018': seed=2020 nTrees =[250] nCuts = [250] nDepth = [2] else: seed=2020 nTrees =[200] nCuts = [200] nDepth = [2] else: if dataset=='2016': seed=2019 nTrees =[300] nCuts = [300] nDepth = [2] elif dataset=='2017': seed=2020 nTrees =[300] nCuts = [300] nDepth = [2] elif dataset=='2018': seed=2021 nTrees =[300] nCuts = [300] nDepth = [2] elif dataset=='20172018': seed=2020 nTrees =[250,300,350] nCuts = [250,300,350] nDepth = [2] else: seed=2020 nTrees =[200] nCuts = [200] nDepth = [2] dataloader.PrepareTrainingAndTestTree(TCut('GGFSignalRegion==1 && (Weight_MVA==0 || Weight_MVA==1)'),'nTrain_Signal=%i:nTrain_Background=%i:SplitMode=Random:!V:SplitSeed=%i'%(nTrain_Signal,nTrain_Background,seed)) for i in nTrees: for j in nCuts: for k in nDepth: factory.BookMethod(dataloader, TMVA.Types.kBDT, 'BDT3_%i_%i_%i'%(i,j,k), '!H:!V:NTrees=%i:MinNodeSize=2.5:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=%i:MaxDepth=%i'%(i,j,k)) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods()
def main(): usage = 'usage: %prog [options]' parser = optparse.OptionParser(usage) parser.add_option('-s', '--signal_sample', dest='input_file_name_signal' , help='signal sample path', default='samples/DiLepTR_ttH_bInclude.root', type='string') parser.add_option('-x', '--bckg1_sample', dest='input_file_name_ttJets' , help='background sample 1 path', default='samples/DiLepTR_ttJets_bInclude.root', type='string') parser.add_option('-y', '--bckg2_sample', dest='input_file_name_ttV' , help='background sample 2 path', default='samples/DiLepTR_ttV_bInclude.root', type='string') parser.add_option('-a', '--activation', dest='activation_function' , help='activation function', default='relu', type='string') parser.add_option('-l', '--hidden_layers', dest='number_of_hidden_layers' , help='number of hidden layers', default='2', type='int') parser.add_option('-t', '--var_transform', dest='var_transform_name' , help='transformation used on input variables', default='None', type='string') parser.add_option('-j', '--json', dest='json' , help='json file with list of variables', default=None, type='string') parser.add_option('-r', '--learning_rate', dest='learning_rate' , help='learning rate', default=0.01, type='float') parser.add_option('-n', '--num_epochs', dest='num_epochs' , help='number of epochs', default=10, type='string') (opt, args) = parser.parse_args() number_of_hidden_layers = opt.number_of_hidden_layers activation_function = opt.activation_function var_transform_name = opt.var_transform_name num_epochs = opt.num_epochs jsonFile = open(opt.json,'r') new_variable_list = json.load(jsonFile,encoding='utf-8').items() learning_rate = opt.learning_rate layer_nodes = 40 # Setup TMVA interface to use Keras TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() if ',' in var_transform_name: var_transform_name_list = var_transform_name.split(',') new_var_transform_name = '+'.join(var_transform_name_list) print 'new_var_transform_name: ', new_var_transform_name else: print 'var_transform_name = ', var_transform_name new_var_transform_name = var_transform_name print 'new_var_transform_name: ' , new_var_transform_name num_inputs = 0 for key, value in new_variable_list: num_inputs = num_inputs + 1 print 'num inputs = ' , str(num_inputs) classifier_parent_dir = 'V8-DNN_%sVars_%sHLs_%s_%s-VarTrans_%s-learnRate_%s-epochs-%s-nodes' % (str(num_inputs),str(number_of_hidden_layers),activation_function,new_var_transform_name,str(learning_rate),num_epochs,str(layer_nodes)) classifier_samples_dir = classifier_parent_dir+"/outputs" if not os.path.exists(classifier_samples_dir): os.makedirs(classifier_samples_dir) output_file_name = '%s/%s.root'%(classifier_samples_dir,classifier_parent_dir) output_file = TFile.Open(output_file_name,'RECREATE') # 'AnalysisType' is where one defines what kind of analysis you're doing e.g. multiclass, Classification .... # VarTransform: Decorrelation, PCA-transformation, Gaussianisation, Normalisation (for all classes if none is specified). # When transformation is specified in factory object, the transformation is only used for informative purposes (not used for classifier inputs). # Distributions can be found in output to see how variables would look if transformed. factory_name = 'Factory_%s' % (classifier_parent_dir) factory_string = '!V:!Silent:Color:DrawProgressBar:Transformations=%s:AnalysisType=multiclass' % var_transform_name factory = TMVA.Factory(factory_name, output_file,factory_string) #Load data input_file_name_signal = opt.input_file_name_signal data_signal = TFile.Open(input_file_name_signal) signal = data_signal.Get('syncTree') input_file_name_ttJets = opt.input_file_name_ttJets data_bckg_ttJets = TFile.Open(input_file_name_ttJets) background_ttJets = data_bckg_ttJets.Get('syncTree') input_file_name_ttV = opt.input_file_name_ttV data_bckg_ttV = TFile.Open(input_file_name_ttV) background_ttV = data_bckg_ttV.Get('syncTree') # Declare a dataloader interface dataloader_name = classifier_parent_dir dataloader = TMVA.DataLoader(dataloader_name) # Can add selection cuts via: # dataloader.AddTree(background_ttJets, 'Background_1', 'myvar > cutBarrelOnly && myEventTypeVar=1', backgroundWeight) ### Global event weights ### signalWeight = 1. backgroundWeight0 = 1. backgroundWeight1 = 1. dataloader.AddTree(signal, 'ttH', signalWeight) dataloader.AddTree(background_ttV, 'ttV', backgroundWeight0) dataloader.AddTree(background_ttJets, 'ttJets', backgroundWeight1) branches = {} for key, value in new_variable_list: dataloader.AddVariable(str(key)) branches[key] = array('f', [-999]) print 'variable: ', key branchName = '' #if 'hadTop_BDT' in key: # branchName = 'hadTop_BDT' #elif 'Hj1_BDT' in key: # branchName = 'Hj1_BDT' #else: # branchName = key branchName = key dataloader.AddSpectator('nEvent','F') # Nominal event weight: # event weight = puWgtNom * trigWgtNom * lepSelEffNom * genWgt * xsecWgt (* 0 or 1 depending on if it passes event selection) dataloader.SetWeightExpression("EventWeight", "ttH") dataloader.SetWeightExpression("EventWeight", "ttV") dataloader.SetWeightExpression("EventWeight", "ttJets") # NormMode: Overall renormalisation of event-by-event weights used in training. # "NumEvents" = average weight of 1 per event, independantly renormalised for signal and background. # "EqualNumEvents" = average weight of 1 per signal event, sum of weights in background equal to sum of weights for signal. #dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:NormMode=EqualNumEvents') dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:SplitMode=Random:NormMode=EqualNumEvents') # Generate model: model = Sequential() # Add layers to DNN ''' Dense: # Number of nodes init= # Initialisation activation= # Activation input_dim= # Shape of inputs (Number of inputs). Argument only needed for first layer. ''' # first hidden layer model.add(Dense(layer_nodes, init='glorot_normal', activation=activation_function, input_dim=len(new_variable_list))) #Randomly set a fraction rate of input units (defined by argument) to 0 at each update during training (helps prevent overfitting). #model.add(Dropout(0.2)) # Hidden layers for x in xrange(number_of_hidden_layers): model.add(Dense(layer_nodes, activation=activation_function)) # Output layer # softmax ensures output values are in range 0-1. Can be used as predicted probabilities. # 'softmax' activation function used in final layer so that the outputs represent probabilities (output is normalised to 1). model.add(Dense(3, activation='softmax')) # Set loss and optimizer # categorical_crossentropy = optimisation algorithm with logarithmic loss function # binary_crossentropy model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=learning_rate), metrics=['accuracy',]) # Store model in file model.save('model.h5') model.summary() # Book methods # Choose classifier and define hyperparameters e.g number of epochs, model filename (as chosen above) etc. # VarTransform: Decorrelate, PCA, Gauss, Norm, None. # Transformations used in booking are used for actual training. logs_dir = classifier_parent_dir+'/logs' #factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100:Tensorboard=%s' % (var_transform_name, num_epochs, logs_dir) factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100' % (var_transform_name, num_epochs) factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "DNN", factory_string_bookMethod) # Run training, testing and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods()
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH varListKey = DEFAULT_VARLISTKEY verbose = True for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth outfname = "dataset/weights/TMVA_" + Note + ".root" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) loader = TMVA.DataLoader("dataset") # Set verbosity # factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] for iVar in varList: if iVar[0] == 'NJets_JetSubCalc': loader.AddVariable(iVar[0], iVar[1], iVar[2], 'I') else: loader.AddVariable(iVar[0], iVar[1], iVar[2], 'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables inputDir = varsList.inputDir infname = "TTTT_TuneCP5_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") loader.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] hist_list = [] weightsList = [] bkgList = varsList.bkg for i in range(len(bkgList)): bkg_list.append(TFile.Open(inputDir + bkgList[i])) print inputDir + bkgList[i] bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb) # ====== register trees ==================================================== # To give different trees for training and testing, do as follows: # loader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # loader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : loader.SetSignalWeightExpression ("weight1*weight2"); # for background: loader.SetBackgroundWeightExpression("weight1*weight2"); loader.SetSignalWeightExpression(weightStrS) loader.SetBackgroundWeightExpression(weightStrB) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation # bdtSetting for "BDT" bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTMitFisher" bdtFSetting = '!H:!V:NTrees=%s' % nTrees bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20' bdtFSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTG" bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20' bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG #bdtGSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTB" bdtBSetting = '!H:!V:NTrees=%s' % nTrees bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20' bdtBSetting += ':IgnoreNegWeightsInTraining=True' # bdtSetting for "BDTD" bdtDSetting = '!H:!V:NTrees=%s' % nTrees bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate' bdtDSetting += ':IgnoreNegWeightsInTraining=True' #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m #BOOKING AN ALGORITHM # if methods=="BDT": factory.BookMethod( TMVA.Types.kBDT, "BDT",bdtSetting) if methods == "BDT": factory.BookMethod(loader, TMVA.Types.kBDT, "BDT", bdtSetting) if methods == "BDTG": factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting) if methods == "BDTMitFisher": factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting) if methods == "BDTB": factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting) if methods == "BDTD": factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the loader to train, test, and evaluate the MVAs. # Train MVAs print "train all method" factory.TrainAllMethods() print "test all method" # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() # save plots: os.chdir('dataset/weights/' + Note) if not gROOT.IsBatch(): TMVA.TMVAGui(outfname) print "DONE"
for period, channel in itertools.product(periods, channels): dijetDEta = array.array('f', [-999]) dijetDPhi = array.array('f', [-999]) llgJJDPhi = array.array('f', [-999]) jPhotonDRMin = array.array('f', [-999]) ptt = array.array('f', [-999]) jetOnePt = array.array('f', [-999]) jetTwoPt = array.array('f', [-999]) kin_bdt = array.array('f', [-999]) vbfPtBalance = array.array('f', [-999]) photonZepp = array.array('f', [-999]) kin_bdt_james = array.array('f', [-999]) vbfWeightsFile = 'trained_bdts/vbf_bdt_combined_ming-yan_current.xml' vbf_reader = t.Reader("!Color:Silent") vbf_reader.AddVariable('dijetDEta', dijetDEta) vbf_reader.AddVariable('dijetDPhi', dijetDPhi) vbf_reader.AddVariable('llgJJDPhi', llgJJDPhi) vbf_reader.AddVariable('jPhotonDRMin', jPhotonDRMin) vbf_reader.AddVariable('ptt', ptt) vbf_reader.AddVariable('jetOnePt', jetOnePt) vbf_reader.AddVariable('jetTwoPt', jetTwoPt) vbf_reader.AddVariable('kin_bdt', kin_bdt) vbf_reader.AddVariable('vbfPtBalance', vbfPtBalance) vbf_reader.AddVariable('photonZepp', photonZepp) vbf_reader.BookMVA('BDT method', vbfWeightsFile) #vbfWeightsFileJames = 'trained_bdts/vbf_bdt_combined_james_current_half_signal_BDT.weights.xml'
def main(o, args): # Import TMVA classes from ROOT from ROOT import TMVA, TFile, TCut print o # Output file outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE') atype = "Classification" if hasattr(o, "type"): atype = str(o.type) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s" % atype) # Set verbosity factory.SetVerbose(o.verbose) TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir # variables if type(o.variables) == str: o.variables = [ v.lstrip().rstrip() for v in o.variables.split(":") if v != "" ] allvars = "" for v in o.variables: factory.AddVariable(str(v)) if allvars != "": allvars += ":" allvars += v.split(":=")[0].lstrip(" ").rstrip(" ") print "variables %s" % allvars print o.spectators for s in o.spectators: if not s in o.variables: factory.AddSpectator(str(s)) # categories and sub categories categories = [] subcategories = [] if hasattr(o, "subcategories") and len(o.subcategories) > 0: subcategories = o.subcategories[0] for sc in o.subcategories[1:]: subcategories = map( lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" % (x[0][1], x[1][1])), itertools.product(subcategories, sc)) for cut, name, vars in o.categories: myvars = allvars if vars != "": for v in vars.split(":"): myvars = myvars.replace(v, "").replace("::", ":") myvars = myvars.rstrip(":") vars = str(myvars) print vars if len(subcategories) > 0: for subcut, subname in subcategories: if subname == "": subname = subname.replace(" ", "").replace( ">", "_gt_").replace("<", "_lt_").replace( "=", "_eq_").replace("&", "_and_") fullname = "%s_%s" % (name, subname) categories.append( (TCut(cut) * TCut(subcut), str(fullname), vars)) else: categories.append((TCut(cut), str(name), vars)) # load tree selection = TCut(o.selection) for evclass, info in o.classes.iteritems(): samples = info["samples"] for name, weight, cut, ttype in samples: tcut = TCut(cut) * selection factory.AddTree(mkChain(getListOfFiles(o.indir, o.files), name), str(evclass), float(weight), tcut, int(ttype)) # weights if "weight" in info: weight = info["weight"] factory.AddSpectator(str("%s_wei := %s" % (evclass, weight))) factory.SetWeightExpression(str(weight), str(evclass)) else: factory.SetWeightExpression("1.", str(evclass)) # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( TCut(""), "SplitMode=Random:NormMode=NumEvents:!V") # -------------------------------------------------------------------------------------------------- # Fisher discriminant (same as LD) defaultSettings = { "BDT": "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedGrad" ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ":Shrinkage=0.3:NTrees=1000", "Cuts": "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" } if "FisherD" in o.methods: mname = "FisherD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D") if "Fisher" in o.methods: mname = "Fisher%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs") if "Likelihood" in o.methods: mname = "Likelihood%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "LikelihoodD" in o.methods: mname = "LikelihoodD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "BDT" in o.methods: mname = "BDT%s" % o.label settings = defaultSettings["BDT"] if hasattr(o, "settings") and "BDT" in o.settings: settings = str(o.settings["BDT"]) if len(categories) == 0: cats = factory.BookMethod(TMVA.Types.kBDT, mname, settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kBDT, "%s_%s" % (mname, name), settings) if "Cuts" in o.methods: mname = "Cuts%s" % o.label settings = defaultSettings["Cuts"] if hasattr(o, "settings") and "Cuts" in o.settings: settings = str(o.settings["Cuts"]) if len(categories) == 0: cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kCuts, "%s_%s" % (mname, name), settings) # ---- Now you can tell the factory to train, test, and evaluate the MVAs. if o.optimize: print "Optimizing?" factory.OptimizeAllMethods() factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() # Save the output. outputFile.Close()
def main(): name = str(options.name) #inputFile_sig = TFile.Open("/home/net3/afortman/projects/hotpot/oct_sim/layertests/hazel_both_smearf_1M_35ns_e100_split.root") #inputFile_bkg = TFile.Open("/home/net3/afortman/projects/hotpot/oct_sim/layertests/hazel_bkg_smearf_"+name+"_train.root") inputFile_sig = TFile.Open( "/home/net3/afortman/projects/hotpot/oct_sim/hazel_sig_smear0.root") inputFile_bkg = TFile.Open( "/home/net3/afortman/projects/hotpot/oct_sim/hazel_bkg_smear0.root") outputFile = TFile.Open("TMVAOutput_" + name + ".root", "RECREATE") factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification") loader = TMVA.DataLoader("dataset0") #loader.AddVariable("EventNumHazel",'I') #loader.AddVariable("EventNumGingko",'I') #loader.AddVariable("trigger_gingko",'I') #loader.AddVariable("iroad_x",'I') #loader.AddVariable("iroad_u",'I') #loader.AddVariable("iroad_v",'I') loader.AddVariable("Hit_plane0", 'I') loader.AddVariable("Hit_plane1", 'I') loader.AddVariable("Hit_plane2", 'I') loader.AddVariable("Hit_plane3", 'I') loader.AddVariable("Hit_plane4", 'I') loader.AddVariable("Hit_plane5", 'I') loader.AddVariable("Hit_plane6", 'I') loader.AddVariable("Hit_plane7", 'I') #loader.AddVariable("Hit_n",'I') #loader.AddVariable("dtheta") #loader.AddVariable("chi2") #tsignal = inputFile_sig.Get("hazel_train") tsignal = inputFile_sig.Get("hazel") tbackground = inputFile_bkg.Get("hazel") loader.AddSignalTree(tsignal) loader.AddBackgroundTree(tbackground) #loader.PrepareTrainingAndTestTree(TCut(""),"nTrain_Signal=1000:nTrain_Background=1000:SplitMode=Random:NormMode=NumEvents:!V") ######### for restriction to 8 layers #loader.PrepareTrainingAndTestTree(TCut("Hit_n==8"),"nTest_Signal=1000:nTest_Background=1000:SplitMode=Random:NormMode=NumEvents:!V") loader.PrepareTrainingAndTestTree( TCut(""), "nTest_Signal=1000:nTest_Background=1000:SplitMode=Random:NormMode=NumEvents:!V" ) #loader.PrepareTrainingAndTestTree(TCut(""),"SplitMode=Random:NormMode=NumEvents:!V") # General layout layoutString = TString("Layout=TANH|128,TANH|128,TANH|128,LINEAR") # Training strategies training0 = TString("LearningRate=1e-1,Momentum=0.9,Repetitions=1," "ConvergenceSteps=2,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.5+0.5+0.5, Multithreading=True") training1 = TString("LearningRate=1e-2,Momentum=0.9,Repetitions=1," "ConvergenceSteps=2,BatchSize=256,TestRepetitions=10," "WeightDecay=1e-4,Regularization=L2," "DropConfig=0.0+0.0+0.0+0.0, Multithreading=True") trainingStrategyString = TString("TrainingStrategy=") trainingStrategyString += training0 + TString("|") + training1 # General Options dnnOptions = TString("!H:!V:ErrorStrategy=CROSSENTROPY:VarTransform=N:" "WeightInitialization=XAVIERUNIFORM") dnnOptions.Append(":") dnnOptions.Append(layoutString) dnnOptions.Append(":") dnnOptions.Append(trainingStrategyString) # Standard implementation, no dependencies. stdOptions = dnnOptions + ":Architecture=CPU" factory.BookMethod(loader, TMVA.Types.kDNN, "DNN", stdOptions) #factory.BookMethod(loader, TMVA.Types.kDNN, "DNN","Architecture=CPU") ##Boosted Decision Trees #factory.BookMethod(loader,TMVA.Types.kBDT, "BDT","!V:NTrees=200:MinNodeSize=2.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) factory.BookMethod(loader, TMVA.Types.kBDT, "BDT") ##Multi-Layer Perceptron (Neural Network) factory.BookMethod( loader, TMVA.Types.kMLP, "MLP", "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) #factory.BookMethod(loader, TMVA.Types.kKNN, "kNN" ); # CPU implementation, using BLAS #cpuOptions = dnnOptions + ":Architecture=CPU" #factory.BookMethod(loader, TMVA.Types.kDNN, "DNN CPU", cpuOptions) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() c = factory.GetROCCurve(loader) c.Draw() c.SaveAs("dataset0/roc_TMVA_smear0.pdf")
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() output = TFile.Open('TMVA_SSSF.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') # factory = TMVA.Factory('TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') dataloader = TMVA.DataLoader('datasetSSSF04Feb') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue print sampleName sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V') factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG0", "!H:!V:NTrees=400:MinNodeSize=1.0%:BoostType=Grad:Shrinkage=0.03:UseBaggedBoost:GradBaggingFraction=0.4:nCuts=100:MaxDepth=1" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG1", "!H:!V:NTrees=400:MinNodeSize=1.0%:BoostType=Grad:Shrinkage=0.03:UseBaggedBoost:GradBaggingFraction=0.4:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4SK01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4F07NT", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4F07NC", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=1000:MaxDepth=2" ) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def main(): print "\n", "=" * 80 print "\tmonoHZZ4L - classification with TMVA" print "=" * 80 # summary root file summaryFilename = 'TMVA.root' # results directory resultsDir = 'results' os.system('mkdir -p %s' % resultsDir) #------------------------------------------------------------------ # get signal file and associated Root tree weightname = "f_weight" # name of event weight variable sigFilename = '../../ntuple_Zprime_MZp01700_MA00300.root' sigFile, sigTree = getTree(sigFilename) # get background file and associated Root tree bkgFilename = '../../ntuple_SM.root' bkgFile, bkgTree = getTree(bkgFilename) #------------------------------------------------------------------ # create a factory for booking machine learning methods #------------------------------------------------------------------ outputFile = TFile("TMVA.root", "recreate") options = ''' !V Color !Silent DrawProgressBar AnalysisType=Classification Transformations=I;D ''' factory = TMVA.Factory("HZZ4L", outputFile, formatOptions(options)) #------------------------------------------------------------------ # set up data set for training and testing #------------------------------------------------------------------ dataLoader = TMVA.DataLoader(resultsDir) # define all MELA variables here dataLoader.AddVariable("f_mass4l", 'D') dataLoader.AddVariable("f_D_bkg_kin", 'D') # define from which trees data are to be taken # and the global weights and event-by-event weights # to be assigned to the training data sigWeight = 1.0 dataLoader.AddSignalTree(sigTree, sigWeight) dataLoader.SetSignalWeightExpression("f_weight") bkgWeight = 1.0 dataLoader.AddBackgroundTree(bkgTree, bkgWeight) dataLoader.SetBackgroundWeightExpression("f_weight") # you can apply cuts, if needed cut = TCut('!f_outlier') options = ''' SplitMode=Block NormMode=EqualNumEvents nTrain_Signal=5000 nTest_Signal=5000 nTrain_Background=5000 nTest_Background=5000 !V ''' dataLoader.PrepareTrainingAndTestTree( cut, # signal cut cut, # background cut formatOptions(options)) #------------------------------------------------------------------ # ok, almost done, define machine learning methods to be run #------------------------------------------------------------------ options = ''' !H !V BoostType=AdaBoost NTrees=5000 nEventsMin=100 nCuts=50 ''' factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT", formatOptions(options)) options = ''' !H !V NCycles=400 VarTransform=N HiddenLayers=10 TrainingMethod=BFGS ''' factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP", formatOptions(options)) #------------------------------------------------------------------ # ok, let's go! #------------------------------------------------------------------ factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputFile.Close()
for regressedvar in regressedvars: for cat in cats: if regressedvar: prefix = "BDT_Reg_common5_input_" midfix = "Reg_" outputpostfix = "reg" else: prefix = "BDT_common5_input_" midfix = "" outputpostfix = "noreg" print "Training the BDT for",cat,"with",outputpostfix,"variables" outputFile = TFile(str(sys.argv[3])+"_"+cat+"_"+outputpostfix+".root","RECREATE") factory = TMVA.Factory( "weights_"+str(sys.argv[3])+"_"+cat+"_"+outputpostfix, outputFile,"V:!Silent:Color:DrawProgressBar:AnalysisType=Classification" ) weightexpression = "Weight_XS*Weight_ElectronSFID*Weight_MuonSFID*Weight_MuonSFIso*Weight_ElectronSFGFS*Weight_MuonSFHIP*Weight_pu69p2*Weight_CSV" #Add Variables if cat == "6j4t": factory.AddVariable("Evt_"+midfix+"blr_ETH_transformed","Evt_"+midfix+"blr_ETH_transformed","units",'F') #factory.AddVariable(prefix+"Evt_blr_ETH_transformed",prefix+"Evt_blr_ETH_transformed","units",'F') factory.AddVariable("Evt_Deta_JetsAverage","Evt_Deta_JetsAverage","units",'F') factory.AddVariable(prefix+"avg_dr_tagged_jets",prefix+"avg_dr_tagged_jets","units",'F') factory.AddVariable(prefix+"tagged_dijet_mass_closest_to_125",prefix+"tagged_dijet_mass_closest_to_125","units",'F') factory.AddVariable(prefix+"closest_tagged_dijet_mass",prefix+"closest_tagged_dijet_mass","units",'F') factory.AddVariable(prefix+"fifth_highest_CSV",prefix+"fifth_highest_CSV","units",'F') factory.AddVariable(prefix+"best_higgs_mass",prefix+"best_higgs_mass","units",'F') factory.AddVariable(prefix+"sphericity",prefix+"sphericity","units",'F')
def main(): NTupName = "varTree" verbose = True alg = "AK10LCTRIMF5R20" spectators = ["m"] cuts = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"] vars = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"] methods = "Likelihood" print "Starting and getting arguments:" allargs = sys.argv[1:] if len(allargs)<5: print "You input these args" print allargs print "Not enough args, please try again" return 1 else: alg = allargs[0] spectators = allargs[1].split(",") cuts = allargs[2].split(",") vars = allargs[3].split(",") methods = allargs[4] print "Running with args:" print " alg = ",alg print " spectators = ",spectators print " cuts = ",cuts print " vars = ",vars print " methods = ",methods # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() #=============================== #Read training and test data #=============================== InputDir = "Data/20150709/" print "Getting inputs from: ",InputDir s1 = TFile(InputDir+"wprime1000_wprime2000.root"); b1 = TFile(InputDir+"dijet.root"); # Output file OutFileName="testout.root" outputFile = TFile( OutFileName, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # weight="" # weight+="pass_selection*EventWeight*CrossSection*(" # weight+="akt10"+alg+"_trim_pt>"+pt1+" && " # weight+="akt10"+alg+"_trim_pt<"+pt2 # if m1!="0": # weight+=" && akt10"+alg+"_trim_mass>"+m1+" && " # weight+="akt10"+alg+"_trim_mass<"+m2 # weight+=")" # # #Get signal and background histograms # if variable=="mass": # histname = "akt10"+alg+"_trim_"+variable # else: # histname = alg+"_"+variable #====================================== #Predefined cuts - for isntance on M(j1) #====================================== mycuts = "1.0" mycutb = "1.0" for cut in cuts: placecut=cut if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="mass": placecut = "* (akt10"+alg+"_trim_"+cut+")" else: placecut="* ("+cut+") " mycuts += placecut mycutb += placecut print "MyCutsSig: ",mycuts print "MyCutsBkg: ",mycutb #=================================== #Spectator variables from tree #===================================== for spec in spectators: factory.AddSpectator( spec , 'F' ) #=================================== #MVA variables from tree #===================================== for var in vars: factory.AddVariable( var , 'F' ) #=============================== #Read training and test data #=============================== print "Getting trees ..." st1 = s1.Get(NTupName) bt1 = b1.Get(NTupName) #========================================= # global event weights per tree (see below for setting event-wise weights) #========================================= ws1 = 1.0 wb1 = 1.0 #========================================= # You can add an arbitrary number of signal or background trees #========================================= factory.AddSignalTree ( st1, ws1 ); factory.SetSignalWeightExpression("EventWeight*CrossSection"); factory.AddBackgroundTree( bt1, wb1 ); factory.SetBackgroundWeightExpression("EventWeight*CrossSection"); # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples mycutSig = TCut(mycuts) mycutBkg = TCut(mycutb) factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: # DEFAULT # factory.BookMethod( TMVA.Types.kPDERS, "PDERS", # "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) # CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % OutFileName print "=== TMVAClassification is done!\n"
np.ones(n_vars) * -1, np.diag(np.ones(n_vars)), n_events) X = np.concatenate([signal, background]) y = np.ones(X.shape[0]) w = RNG.randint(1, 10, n_events * 2) y[signal.shape[0]:] *= -1 permute = RNG.permutation(y.shape[0]) X = X[permute] y = y[permute] # Split into training and test datasets X_train, y_train, w_train = X[:n_events], y[:n_events], w[:n_events] X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:] output = TFile('tmva_output.root', 'recreate') factory = TMVA.Factory('classifier', output, 'AnalysisType=Classification:' '!V:Silent:!DrawProgressBar') for n in range(n_vars): factory.AddVariable('f{0}'.format(n), 'F') # Call root_numpy's utility functions to add events from the arrays add_classification_events(factory, X_train, y_train, weights=w_train) add_classification_events(factory, X_test, y_test, weights=w_test, test=True) # Train a classifier factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents') factory.BookMethod('Fisher', 'Fisher', 'Fisher:VarTransform=None:CreateMVAPdfs:' 'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:' 'NsmoothMVAPdf=10') factory.TrainAllMethods()
def runJob(): TMVA.Tools.Instance() TMVA.PyMethodBase.PyInitialize() dataloader = TMVA.DataLoader('dataset_24March') output = TFile.Open('TMVA_24March.root', 'RECREATE') factory = TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') for br in config.mvaVariables: dataloader.AddVariable(br) for sampleName, sample in config.samples.items(): if config.structure[sampleName]['isData'] == 1: continue sample['tree'] = TChain("Events") for f in sample['name']: sample['tree'].Add(f) if config.structure[sampleName]['isSignal'] == 1: dataloader.AddSignalTree(sample['tree'], 1.0) else: dataloader.AddBackgroundTree(sample['tree'], 1.0) # output_dim += 1 dataloader.PrepareTrainingAndTestTree( TCut(config.cut), 'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents') factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT1", "!H:!V:NTrees=1000:MinNodeSize=0.5%:MaxDepth=2:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=1000" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=1000" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4SK01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG4SK01F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20") factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB2", "!H:!V:NTrees=800:BoostType=Bagging:SeparationType=GiniIndex:nCuts=50") factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB3", "!H:!V:NTrees=1000:BoostType=Bagging:SeparationType=GiniIndex:nCuts=100" ) # Run training, test and evaluation factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() output.Close()
def main(): try: # retrive command line options shortopts = "m:o:l:s:vh?" longopts = ["methods=", "outputfile=", "lepton=", "signal=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) lepton = DEFAULT_LEPTON outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False signal = DEFAULT_SIGNAL for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-s", "--signal"): signal = a elif o in ("-v", "--verbose"): verbose = True elif o in ("-l", "--lepton"): if a == "electron": lepton = ELECTRON elif a == "muon": lepton = MUON else: print "*** Lepton must be 'electron' or 'muon ****" sys.exit(1) # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # let's open the input files if lepton == ELECTRON: print "Lepton is ELECTRON." path = "/data3/jmitrevs/lepphoton/elphoton_ntuple2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_el.root" wino_600_500FileName = path + "wino_600_500_el.root" wino_1000_200FileName = path + "wino_1000_200_el.root" wino_1500_300FileName = path + "wino_1500_300_el.root" WlepnuFileName_Np0 = path + "Wenu_Np0.root" WlepnuFileName_Np1 = path + "Wenu_Np1.root" WlepnuFileName_Np2 = path + "Wenu_Np2.root" WlepnuFileName_Np3 = path + "Wenu_Np3.root" WlepnuFileName_Np4 = path + "Wenu_Np4.root" WlepnuFileName_Np5 = path + "Wenu_Np5.root" ZleplepFileName_Np0 = path + "Zee_Np0.root" ZleplepFileName_Np1 = path + "Zee_Np1.root" ZleplepFileName_Np2 = path + "Zee_Np2.root" ZleplepFileName_Np3 = path + "Zee_Np3.root" ZleplepFileName_Np4 = path + "Zee_Np4.root" ZleplepFileName_Np5 = path + "Zee_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_enu.root" st_schan_lepnuFileName = path + "st_schan_enu.root" ZleplepgammaFileName = path + "Zeegamma.root" elif lepton == MUON: print "Lepton is MUON." path = "/data3/jmitrevs/lepphoton/muphoton2/mergedFiles/" wino_600_200FileName = path + "wino_600_200_mu.root" wino_600_500FileName = path + "wino_600_500_mu.root" wino_1000_200FileName = path + "wino_1000_200_mu.root" wino_1500_300FileName = path + "wino_1500_300_mu.root" WlepnuFileName_Np0 = path + "Wmunu_Np0.root" WlepnuFileName_Np1 = path + "Wmunu_Np1.root" WlepnuFileName_Np2 = path + "Wmunu_Np2.root" WlepnuFileName_Np3 = path + "Wmunu_Np3.root" WlepnuFileName_Np4 = path + "Wmunu_Np4.root" WlepnuFileName_Np5 = path + "Wmunu_Np5.root" ZleplepFileName_Np0 = path + "Zmumu_Np0.root" ZleplepFileName_Np1 = path + "Zmumu_Np1.root" ZleplepFileName_Np2 = path + "Zmumu_Np2.root" ZleplepFileName_Np3 = path + "Zmumu_Np3.root" ZleplepFileName_Np4 = path + "Zmumu_Np4.root" ZleplepFileName_Np5 = path + "Zmumu_Np5.root" st_tchan_lepnuFileName = path + "st_tchan_munu.root" st_schan_lepnuFileName = path + "st_schan_munu.root" ZleplepgammaFileName = path + "Zmumugamma.root" else: raise ValueError("Lepton has to be ELECTRON or MUON.") WtaunuFileName_Np0 = path + "Wtaunu_Np0.root" WtaunuFileName_Np1 = path + "Wtaunu_Np1.root" WtaunuFileName_Np2 = path + "Wtaunu_Np2.root" WtaunuFileName_Np3 = path + "Wtaunu_Np3.root" WtaunuFileName_Np4 = path + "Wtaunu_Np4.root" WtaunuFileName_Np5 = path + "Wtaunu_Np5.root" ZtautauFileName_Np0 = path + "Ztautau_Np0.root" ZtautauFileName_Np1 = path + "Ztautau_Np1.root" ZtautauFileName_Np2 = path + "Ztautau_Np2.root" ZtautauFileName_Np3 = path + "Ztautau_Np3.root" ZtautauFileName_Np4 = path + "Ztautau_Np4.root" ZtautauFileName_Np5 = path + "Ztautau_Np5.root" st_tchan_taunuFileName = path + "st_tchan_taunu.root" st_schan_taunuFileName = path + "st_schan_taunu.root" st_WtFileName = path + "st_Wt.root" WgammaFileName_Np0 = path + "Wgamma_Np0.root" WgammaFileName_Np1 = path + "Wgamma_Np1.root" WgammaFileName_Np2 = path + "Wgamma_Np2.root" WgammaFileName_Np3 = path + "Wgamma_Np3.root" WgammaFileName_Np4 = path + "Wgamma_Np4.root" WgammaFileName_Np5 = path + "Wgamma_Np5.root" ttbarFileName = path + "ttbar.root" WWFileName = path + "WW.root" WZFileName = path + "WZ.root" ZZFileName = path + "ZZ.root" ZtautaugammaFileName = path + "Ztautaugamma.root" ########################################### wino_600_200File = TFile(wino_600_200FileName) wino_600_500File = TFile(wino_600_500FileName) wino_1000_200File = TFile(wino_1000_200FileName) wino_1500_300File = TFile(wino_1500_300FileName) WlepnuFile_Np0 = TFile(WlepnuFileName_Np0) WlepnuFile_Np1 = TFile(WlepnuFileName_Np1) WlepnuFile_Np2 = TFile(WlepnuFileName_Np2) WlepnuFile_Np3 = TFile(WlepnuFileName_Np3) WlepnuFile_Np4 = TFile(WlepnuFileName_Np4) WlepnuFile_Np5 = TFile(WlepnuFileName_Np5) WtaunuFile_Np0 = TFile(WtaunuFileName_Np0) WtaunuFile_Np1 = TFile(WtaunuFileName_Np1) WtaunuFile_Np2 = TFile(WtaunuFileName_Np2) WtaunuFile_Np3 = TFile(WtaunuFileName_Np3) WtaunuFile_Np4 = TFile(WtaunuFileName_Np4) WtaunuFile_Np5 = TFile(WtaunuFileName_Np5) ZleplepFile_Np0 = TFile(ZleplepFileName_Np0) ZleplepFile_Np1 = TFile(ZleplepFileName_Np1) ZleplepFile_Np2 = TFile(ZleplepFileName_Np2) ZleplepFile_Np3 = TFile(ZleplepFileName_Np3) ZleplepFile_Np4 = TFile(ZleplepFileName_Np4) ZleplepFile_Np5 = TFile(ZleplepFileName_Np5) ZtautauFile_Np0 = TFile(ZtautauFileName_Np0) ZtautauFile_Np1 = TFile(ZtautauFileName_Np1) ZtautauFile_Np2 = TFile(ZtautauFileName_Np2) ZtautauFile_Np3 = TFile(ZtautauFileName_Np3) ZtautauFile_Np4 = TFile(ZtautauFileName_Np4) ZtautauFile_Np5 = TFile(ZtautauFileName_Np5) WgammaFile_Np0 = TFile(WgammaFileName_Np0) WgammaFile_Np1 = TFile(WgammaFileName_Np1) WgammaFile_Np2 = TFile(WgammaFileName_Np2) WgammaFile_Np3 = TFile(WgammaFileName_Np3) WgammaFile_Np4 = TFile(WgammaFileName_Np4) WgammaFile_Np5 = TFile(WgammaFileName_Np5) ttbarFile = TFile(ttbarFileName) st_tchan_lepnuFile = TFile(st_tchan_lepnuFileName) st_tchan_taunuFile = TFile(st_tchan_taunuFileName) st_schan_lepnuFile = TFile(st_schan_lepnuFileName) st_schan_taunuFile = TFile(st_schan_taunuFileName) st_WtFile = TFile(st_WtFileName) WWFile = TFile(WWFileName) WZFile = TFile(WZFileName) ZZFile = TFile(ZZFileName) ZleplepgammaFile = TFile(ZleplepgammaFileName) ZtautaugammaFile = TFile(ZtautaugammaFileName) ########################################### wino_600_200Tree = wino_600_200File.Get("GammaLepton") wino_600_500Tree = wino_600_500File.Get("GammaLepton") wino_1000_200Tree = wino_1000_200File.Get("GammaLepton") wino_1500_300Tree = wino_1500_300File.Get("GammaLepton") WlepnuTree_Np0 = WlepnuFile_Np0.Get("GammaLepton") WlepnuTree_Np1 = WlepnuFile_Np1.Get("GammaLepton") WlepnuTree_Np2 = WlepnuFile_Np2.Get("GammaLepton") WlepnuTree_Np3 = WlepnuFile_Np3.Get("GammaLepton") WlepnuTree_Np4 = WlepnuFile_Np4.Get("GammaLepton") WlepnuTree_Np5 = WlepnuFile_Np5.Get("GammaLepton") WtaunuTree_Np0 = WtaunuFile_Np0.Get("GammaLepton") WtaunuTree_Np1 = WtaunuFile_Np1.Get("GammaLepton") WtaunuTree_Np2 = WtaunuFile_Np2.Get("GammaLepton") WtaunuTree_Np3 = WtaunuFile_Np3.Get("GammaLepton") WtaunuTree_Np4 = WtaunuFile_Np4.Get("GammaLepton") WtaunuTree_Np5 = WtaunuFile_Np5.Get("GammaLepton") ZleplepTree_Np0 = ZleplepFile_Np0.Get("GammaLepton") ZleplepTree_Np1 = ZleplepFile_Np1.Get("GammaLepton") ZleplepTree_Np2 = ZleplepFile_Np2.Get("GammaLepton") ZleplepTree_Np3 = ZleplepFile_Np3.Get("GammaLepton") ZleplepTree_Np4 = ZleplepFile_Np4.Get("GammaLepton") ZleplepTree_Np5 = ZleplepFile_Np5.Get("GammaLepton") ZtautauTree_Np0 = ZtautauFile_Np0.Get("GammaLepton") ZtautauTree_Np1 = ZtautauFile_Np1.Get("GammaLepton") ZtautauTree_Np2 = ZtautauFile_Np2.Get("GammaLepton") ZtautauTree_Np3 = ZtautauFile_Np3.Get("GammaLepton") ZtautauTree_Np4 = ZtautauFile_Np4.Get("GammaLepton") ZtautauTree_Np5 = ZtautauFile_Np5.Get("GammaLepton") WgammaTree_Np0 = WgammaFile_Np0.Get("GammaLepton") WgammaTree_Np1 = WgammaFile_Np1.Get("GammaLepton") WgammaTree_Np2 = WgammaFile_Np2.Get("GammaLepton") WgammaTree_Np3 = WgammaFile_Np3.Get("GammaLepton") WgammaTree_Np4 = WgammaFile_Np4.Get("GammaLepton") WgammaTree_Np5 = WgammaFile_Np5.Get("GammaLepton") ttbarTree = ttbarFile.Get("GammaLepton") st_tchan_lepnuTree = st_tchan_lepnuFile.Get("GammaLepton") st_tchan_taunuTree = st_tchan_taunuFile.Get("GammaLepton") st_schan_lepnuTree = st_schan_lepnuFile.Get("GammaLepton") st_schan_taunuTree = st_schan_taunuFile.Get("GammaLepton") st_WtTree = st_WtFile.Get("GammaLepton") WWTree = WWFile.Get("GammaLepton") WZTree = WZFile.Get("GammaLepton") ZZTree = ZZFile.Get("GammaLepton") ZleplepgammaTree = ZleplepgammaFile.Get("GammaLepton") ZtautaugammaTree = ZtautaugammaFile.Get("GammaLepton") ############################## # and now the weights # wino_600_200_scale = 7.005 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 0.0129441737417 # Wgamma_Np1_scale = 0.0635170304401 # Wgamma_Np2_scale = 0.140920227273 # Wgamma_Np3_scale = 0.140622611111 # Wgamma_Np4_scale = 0.134589 # Wgamma_Np5_scale = 0.123308 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_schan_lepnu_scale = 0.0092735093327 # st_schan_taunu_scale = 0.00926981472204 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 #################ntuple_pt25 # wino_600_200_scale = 1.401 # wino_600_500_scale = 3.03021 # wino_1000_200_scale = 4.1325 # wino_1500_300_scale = 0.16 # Wlepnu_Np0_scale = 12.0052623622 # Wlepnu_Np1_scale = 3.13076456857 # Wlepnu_Np2_scale = 0.60296853897 # Wlepnu_Np3_scale = 0.603183318846 # Wlepnu_Np4_scale = 0.62088 # Wlepnu_Np5_scale = 0.600008571551 # Wtaunu_Np0_scale = 12.1457006649 # Wtaunu_Np1_scale = 3.12868868923 # Wtaunu_Np2_scale = 0.602359552172 # Wtaunu_Np3_scale = 0.602586672951 # Wtaunu_Np4_scale = 0.62088496708 # Wtaunu_Np5_scale = 0.638769230769 # Zleplep_Np0_scale = 0.631361988532 # Zleplep_Np1_scale = 0.629541167757 # Zleplep_Np2_scale = 0.625618828688 # Zleplep_Np3_scale = 0.634090909091 # Zleplep_Np4_scale = 0.6 # Zleplep_Np5_scale = 0.51875 # Ztautau_Np0_scale = 0.631228327261 # Ztautau_Np1_scale = 0.631347664299 # Ztautau_Np2_scale = 0.622916409433 # Ztautau_Np3_scale = 0.640077378243 # Ztautau_Np4_scale = 0.581269375646 # Ztautau_Np5_scale = 0.48125 # Wgamma_Np0_scale = 1.08706263428 # Wgamma_Np1_scale = 0.734676952566 # Wgamma_Np2_scale = 0.733754057143 # Wgamma_Np3_scale = 0.149752323594 # Wgamma_Np4_scale = 0.157524392683 # Wgamma_Np5_scale = 0.1281354 # ttbar_scale = 0.0384505023442 # st_tchan_lepnu_scale = 0.200916540624 # st_tchan_taunu_scale = 0.201132004918 # st_Wt_scale = 0.0916407781992 # WW_scale = 0.0342151663714 # WZ_scale = 0.110873818259 # ZZ_scale = 0.0252773011092 # Zleplepgamma_scale = 0.963 # Ztautaugamma_scale = 0.941960800016 # gamma_Np1_scale = 4.06453310851 # gamma_Np2_scale = 3.3709968686 # gamma_Np3_scale = 1.38728943513 # gamma_Np4_scale = 1.41464077802 # gamma_Np5_scale = 1.23661096137 if lepton == ELECTRON: wino_600_200_scale = 0.291875 wino_600_500_scale = 2.69352 wino_1000_200_scale = 4.1325 wino_1500_300_scale = 0.0093269 wino_1000_100_scale = 69.5 wino_800_700_scale = 0.2328 Wlepnu_Np0_scale = 12.0052623622 Wlepnu_Np1_scale = 3.13076456857 Wlepnu_Np2_scale = 0.60296853897 Wlepnu_Np3_scale = 0.603183318846 Wlepnu_Np4_scale = 0.62088 Wlepnu_Np5_scale = 0.600008571551 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631361988532 Zleplep_Np1_scale = 0.629541167757 Zleplep_Np2_scale = 0.625618828688 Zleplep_Np3_scale = 0.634090909091 Zleplep_Np4_scale = 0.6 Zleplep_Np5_scale = 0.51875 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.200916540624 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.17064063358 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 elif lepton == MUON: wino_600_200_scale = 0.291875 Wlepnu_Np0_scale = 11.9925371604 Wlepnu_Np1_scale = 3.13058966 Wlepnu_Np2_scale = 0.601616497017 Wlepnu_Np3_scale = 0.605913424797 Wlepnu_Np4_scale = 0.606001176701 Wlepnu_Np5_scale = 0.593142857143 Wtaunu_Np0_scale = 12.1457006649 Wtaunu_Np1_scale = 3.12868868923 Wtaunu_Np2_scale = 0.602359552172 Wtaunu_Np3_scale = 0.602586672951 Wtaunu_Np4_scale = 0.62088496708 Wtaunu_Np5_scale = 0.638769230769 Zleplep_Np0_scale = 0.631664271554 Zleplep_Np1_scale = 0.628327597475 Zleplep_Np2_scale = 0.62551337696 Zleplep_Np3_scale = 0.635795454545 Zleplep_Np4_scale = 0.572916666667 Zleplep_Np5_scale = 0.48125 Ztautau_Np0_scale = 0.631228327261 Ztautau_Np1_scale = 0.631347664299 Ztautau_Np2_scale = 0.622916409433 Ztautau_Np3_scale = 0.640077378243 Ztautau_Np4_scale = 0.581269375646 Ztautau_Np5_scale = 0.48125 Wgamma_Np0_scale = 0.0132834003639 Wgamma_Np1_scale = 0.0651816146862 Wgamma_Np2_scale = 0.144613309091 Wgamma_Np3_scale = 0.144307893333 Wgamma_Np4_scale = 0.13811616 Wgamma_Np5_scale = 0.12653952 ttbar_scale = 0.0384505023442 st_tchan_lepnu_scale = 0.201919368378 st_tchan_taunu_scale = 0.201132004918 st_Wt_scale = 0.0916407781992 WW_scale = 0.0342151663714 WZ_scale = 0.110873818259 ZZ_scale = 0.0252773011092 Zleplepgamma_scale = 0.963963963964 Ztautaugamma_scale = 0.941960800016 gamma_Np1_scale = 4.08704733658 gamma_Np2_scale = 3.35244054801 gamma_Np3_scale = 1.36994217452 gamma_Np4_scale = 1.41464077802 gamma_Np5_scale = 1.23661096137 if signal == "600_200": factory.AddSignalTree(wino_600_200Tree, wino_600_200_scale) elif signal == "600_500": factory.AddSignalTree(wino_600_500Tree, wino_600_500_scale) elif signal == "1000_200": factory.AddSignalTree(wino_1000_200Tree, wino_1000_200_scale) elif signal == "1500_300": factory.AddSignalTree(wino_1500_300Tree, wino_1500_300_scale) else: print "*** signal designation not supported: %s ****" % signal sys.exit(1) factory.AddBackgroundTree(WlepnuTree_Np0, Wlepnu_Np0_scale) factory.AddBackgroundTree(WlepnuTree_Np1, Wlepnu_Np1_scale) factory.AddBackgroundTree(WlepnuTree_Np2, Wlepnu_Np2_scale) factory.AddBackgroundTree(WlepnuTree_Np3, Wlepnu_Np3_scale) factory.AddBackgroundTree(WlepnuTree_Np4, Wlepnu_Np4_scale) factory.AddBackgroundTree(WlepnuTree_Np5, Wlepnu_Np5_scale) #factory.AddBackgroundTree(WtaunuTree_Np0, Wtaunu_Np0_scale) factory.AddBackgroundTree(WtaunuTree_Np1, Wtaunu_Np1_scale) factory.AddBackgroundTree(WtaunuTree_Np2, Wtaunu_Np2_scale) factory.AddBackgroundTree(WtaunuTree_Np3, Wtaunu_Np3_scale) factory.AddBackgroundTree(WtaunuTree_Np4, Wtaunu_Np4_scale) factory.AddBackgroundTree(WtaunuTree_Np5, Wtaunu_Np5_scale) # factory.AddBackgroundTree(ZleplepTree_Np0, Zleplep_Np0_scale) # factory.AddBackgroundTree(ZleplepTree_Np1, Zleplep_Np1_scale) # factory.AddBackgroundTree(ZleplepTree_Np2, Zleplep_Np2_scale) # factory.AddBackgroundTree(ZleplepTree_Np3, Zleplep_Np3_scale) # factory.AddBackgroundTree(ZleplepTree_Np4, Zleplep_Np4_scale) # factory.AddBackgroundTree(ZleplepTree_Np5, Zleplep_Np5_scale) # factory.AddBackgroundTree(ZtautauTree_Np0, Ztautau_Np0_scale) # factory.AddBackgroundTree(ZtautauTree_Np1, Ztautau_Np1_scale) # factory.AddBackgroundTree(ZtautauTree_Np2, Ztautau_Np2_scale) # factory.AddBackgroundTree(ZtautauTree_Np3, Ztautau_Np3_scale) # factory.AddBackgroundTree(ZtautauTree_Np4, Ztautau_Np4_scale) # factory.AddBackgroundTree(ZtautauTree_Np5, Ztautau_Np5_scale) factory.AddBackgroundTree(WgammaTree_Np0, Wgamma_Np0_scale) factory.AddBackgroundTree(WgammaTree_Np1, Wgamma_Np1_scale) factory.AddBackgroundTree(WgammaTree_Np2, Wgamma_Np2_scale) factory.AddBackgroundTree(WgammaTree_Np3, Wgamma_Np3_scale) factory.AddBackgroundTree(WgammaTree_Np4, Wgamma_Np4_scale) factory.AddBackgroundTree(WgammaTree_Np5, Wgamma_Np5_scale) factory.AddBackgroundTree(ttbarTree, ttbar_scale) factory.AddBackgroundTree(st_tchan_lepnuTree, st_tchan_lepnu_scale) factory.AddBackgroundTree(st_tchan_taunuTree, st_tchan_taunu_scale) # factory.AddBackgroundTree(st_schan_lepnuTree, st_schan_lepnu_scale) # factory.AddBackgroundTree(st_schan_taunuTree, st_schan_taunu_scale) factory.AddBackgroundTree(st_WtTree, st_Wt_scale) factory.AddBackgroundTree(WWTree, WW_scale) factory.AddBackgroundTree(WZTree, WZ_scale) factory.AddBackgroundTree(ZZTree, ZZ_scale) factory.AddBackgroundTree(ZleplepgammaTree, Zleplepgamma_scale) factory.AddBackgroundTree(ZtautaugammaTree, Ztautaugamma_scale) # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "MET := sqrt(Metx*Metx+Mety*Mety)", 'F' ) # factory.AddVariable( "HT", 'F' ) factory.AddVariable( "PhotonPt[0]", 'F' ) #factory.AddVariable( "ElectronPt[0]", 'F' ) if lepton == ELECTRON: factory.AddVariable( "mTel", 'F' ) else: factory.AddVariable( "mTmu", 'F' ) #factory.AddVariable( "abs(PhotonEta[0])", 'F' ) #factory.AddVariable( "abs(ElectronEta[0])", 'F' ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "abs(PhotonEta[0]) < 2.01 && abs(ElectronEta[0]) < 2.01" ) if lepton == ELECTRON: mycutSig = TCut( "sqrt((PhotonEta[0]-ElectronEta[0])*(PhotonEta[0]-ElectronEta[0]) + (PhotonPhi[0]-ElectronPhi[0])*(PhotonPhi[0]-ElectronPhi[0])) > 0.7") else: mycutSig = TCut( "sqrt((PhotonEta[0]-MuonEta[0])*(PhotonEta[0]-MuonEta[0]) + (PhotonPhi[0]-MuonPhi[0])*(PhotonPhi[0]-MuonPhi[0])) > 0.7") #mycutSig = TCut( "PhotonPt[0] > 85000" ) mycutBkg = mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) # if "CutsGA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsGA", # "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) # if "CutsSA" in mlist: # factory.BookMethod( TMVA.Types.kCuts, "CutsSA", # "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
def main(): # runs the program checkRootVer() # check that ROOT version is correct try: # retrieve command line options shortopts = "d:o:v:w:y:h?" # possible command line options longopts = ["dataset=", "option=", "where=", "year=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) # associates command line inputs to variables except getopt.GetoptError: # output error if command line argument invalid print("ERROR: unknown options in argument %s" %sys.argv[1:]) usage() sys.exit(1) myArgs = np.array([ # Stores the command line arguments ['-d','--dataset','dataset','dataset'], ['-w','--where','where','lpc'], ['-y','--year','year',2017], ['-o','--option','option', 0], ['-v','--verbose','verbose', True] ], dtype = "object") for opt, arg in opts: if opt in myArgs[:,0]: index = np.where(myArgs[:,0] == opt)[0][0] # np.where returns a tuple of arrays myArgs[index,3] = str(arg) # override the variables with the command line argument elif opt in myArgs[:,1]: index = np.where(myArgs[:,1] == opt)[0][0] myArgs[index,3] = arg if opt in ("-?", "-h", "--help", "--usage"): # provides command line help usage() sys.exit(0) # Initialize some containers bkg_list = [] bkg_trees_list = [] sig_list = [] sig_trees_list = [] # Initialize some variables after reading in arguments option_index = np.where(myArgs[:,2] == 'option')[0][0] dataset_index = np.where(myArgs[:,2] == 'dataset')[0][0] verbose_index = np.where(myArgs[:,2] == 'verbose')[0][0] where_index = np.where(myArgs[:,2] == 'where')[0][0] year_index = np.where(myArgs[:,2] == 'year')[0][0] DATASETPATH = myArgs[dataset_index][3] DATASET = DATASETPATH.split("/")[0] OPTION = myArgs[option_index][3] VERBOSE = myArgs[verbose_index][3] WHERE = myArgs[where_index][3] YEAR = myArgs[year_index][3] if WHERE == "lpc": if YEAR == 2017: inputDir = varsList.inputDirLPC2017 elif YEAR == 2018: inputDir = varsList.inputDirLPC2018 else: if YEAR == 2017: inputDir = varsList.inputDirBRUX2017 elif YEAR == 2018: inputDir = varsList.inputDirBRUX2018 if OPTION == "0": print("Using Option 0: default varList") varList = varsList.varList["DNN"] elif OPTION == "1": print("Using Option 1: selected data from {}".format(DATASETPATH)) varsListHPO = open( DATASETPATH + "/varsListHPO.txt", "r" ).readlines() varList = [] START = False for line in varsListHPO: if START == True: varList.append(str(line.strip())) if "Variable List:" in line: START = True numVars = len(varList) outf_key = str("Keras_" + str(numVars) + "vars") OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root" outputfile = TFile( OUTF_NAME, "RECREATE" ) # initialize and set-up TMVA factory factory = TMVA.Factory( "Training", outputfile, "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification" ) factory.SetVerbose(bool( myArgs[verbose_index,3] ) ) (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key # initialize and set-up TMVA loader loader = TMVA.DataLoader( DATASET ) if OPTION == "0": for var in varList: if var[0] == "NJets_MultiLepCalc": loader.AddVariable(var[0],var[1],var[2],'I') else: loader.AddVariable(var[0],var[1],var[2],"F") if OPTION == "1": for var in varList: if var == "NJets_MultiLepCalc": loader.AddVariable(var,"","","I") else: loader.AddVariable(var,"","","F") # add signal files if YEAR == 2017: for i in range( len( varsList.sig2017_2 ) ): sig_list.append( TFile.Open( inputDir + varsList.sig2017_2[i] ) ) sig_trees_list.append( sig_list[i].Get("ljmet") ) sig_trees_list[i].GetEntry(0) loader.AddSignalTree( sig_trees_list[i] ) elif YEAR == 2018: for i in range( len( varsList.sig2018_2 ) ): sig_list.append( TFile.Open( inputDir + varsList.sig2018_2[i] ) ) sig_trees_list.append( sig_list[i].Get("ljmet") ) sig_trees_list[i].GetEntry(0) loader.AddSignalTree( sig_trees_list[i] ) # add background files if YEAR == 2017: for i in range( len( varsList.bkg2017_2 ) ): bkg_list.append( TFile.Open( inputDir + varsList.bkg2017_2[i] ) ) bkg_trees_list.append( bkg_list[i].Get( "ljmet" ) ) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree( bkg_trees_list[i] ) elif YEAR == 2018: for i in range( len( varsList.bkg2018_2 ) ): bkg_list.append( TFile.Open( inputDir + varsList.bkg2018_2[i] ) ) bkg_trees_list.append( bkg_list[i].Get( "ljmet" ) ) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue loader.AddBackgroundTree( bkg_trees_list[i] ) loader.SetSignalWeightExpression( weightStrS ) loader.SetBackgroundWeightExpression( weightStrB ) mycutSig = TCut( cutStrS ) mycutBkg = TCut( cutStrB ) loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) ###################################################### ###################################################### ###### ###### ###### K E R A S D N N ###### ###### ###### ###################################################### ###################################################### HIDDEN=0 NODES=0 LRATE=0. PATTERN="" REGULATOR="" ACTIVATION="" BATCH_SIZE=0 # modify this when implementing hyper parameter optimization: model_name = 'TTTT_' + str(numVars) + 'vars_model.h5' EPOCHS = 100 PATIENCE = 20 # edit these based on hyper parameter optimization results if OPTION == "0": HIDDEN = 3 NODES = 100 LRATE = 0.01 PATTERN = 'static' REGULATOR = 'none' ACTIVATION = 'relu' BATCH_SIZE = 256 if OPTION == "1": datasetDir = os.listdir(DATASETPATH) for file in datasetDir: if "params" in file: optFileName = file optFile = open(DATASETPATH + "/" + optFileName,"r").readlines() START = False for line in optFile: if START == True: if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip()) if "Initial" in line: NODES = int(line.split(":")[1].strip()) if "Batch" in line: BATCH_SIZE = 2**int(line.split(":")[1].strip()) if "Learning" in line: LRATE = float(line.split(":")[1].strip()) if "Pattern" in line: PATTERN = str(line.split(":")[1].strip()) if "Regulator" in line: REGULATOR = str(line.split(":")[1].strip()) if "Activation" in line: ACTIVATION = str(line.split(":")[1].strip()) if "Optimized Parameters:" in line: START = True kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \ ':SaveBestOnly=true' + \ ':NumEpochs=' + str(EPOCHS) + \ ':BatchSize=' + str(BATCH_SIZE) + \ ':TriesEarlyStopping=' + str(PATIENCE) model = build_model(HIDDEN,NODES,LRATE,REGULATOR,PATTERN,ACTIVATION,numVars) model.save( model_name ) model.summary() factory.BookMethod( loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting ) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outputfile.Close() print("Finished training in " + str((time.time() - START_TIME) / 60.0) + " minutes.") ROC = factory.GetROCIntegral( DATASET, 'PyKeras') print('ROC value is: {}'.format(ROC)) if OPTION == "1": varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt","a") varsListHPOtxt.write("ROC Value: {}".format(ROC))
def main(): try: # retrive command line options shortopts = "m:i:t:o:vh?" longopts = [ "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath("./") gROOT.Macro("./TMVAlogon.C") gROOT.LoadMacro("./TMVAGui.C") # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 # gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory" # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable("myvar1 := var1+var2", 'F') factory.AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F') factory.AddVariable("var3", "Variable 3", "units", 'F') factory.AddVariable("var4", "Variable 4", "units", 'F') # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables factory.AddSpectator("spec1:=var1*2", "Spectator 1", "units", 'F') factory.AddSpectator("spec2:=var1*3", "Spectator 2", "units", 'F') # Read input data if gSystem.AccessPathName(infname) != 0: gSystem.Exec("wget http://root.cern.ch/files/" + infname) input = TFile.Open(infname) # Get the signal and background trees for training signal = input.Get(treeNameSig) background = input.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree(signal, signalWeight) factory.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); factory.SetBackgroundWeightExpression("weight") # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut("") mycutBkg = TCut("") # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros gROOT.ProcessLine("TMVAGui(\"%s\")" % outfname) # keep the ROOT thread running gApplication.Run()
def TMVAClassification(methods, sigfname, bkgfname, optname, channel, trees, verbose=False): #="DecayTree,DecayTree" # Print methods mlist = methods.replace(' ', ',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Define trees trees = trees.split(",") if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] # Print output file and directory outfname = "TMVA_%s_%s.root" % (channel, optname) myWeightDirectory = "weights_%s_%s" % (channel, optname) print "=== TMVAClassification: output will be written to:" print "=== %s" % outfname print "=== %s" % myWeightDirectory # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI #gROOT.SetMacroPath( "./" ) #gROOT.Macro ( "./tmva/test/TMVAlogon.C" ) #gROOT.LoadMacro ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary?? # Import TMVA classes from ROOT from ROOT import TMVA # Setup TMVA TMVA.Tools.Instance() # Output file outputFile = TFile(outfname, 'RECREATE') # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose(verbose) # Load data dataloader = TMVA.DataLoader("dataset") # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] print "*** Training on channel:" print "*** %s" % channel print "***" ''' if channel == "1": #dataloader.AddVariable( "pplus_ProbNNp", "Prob(p^{+})", "", 'F' ); #dataloader.AddVariable( "Kminus_ProbNNk", "Prob(K^{-})", "", 'F' ); dataloader.AddVariable( "pplus_PT", "P_{T}(p^{+})", "MeV", 'F' ); dataloader.AddVariable( "Kminus_PT", "P_{T}(K^{-})", "MeV", 'F' ); dataloader.AddVariable( "gamma_PT", "P_{T}(#gamma)", "MeV", 'F' ); dataloader.AddVariable( "Lambda_1520_0_PT", "P_{T}(#Lambda(1520))", "MeV", 'F' ); dataloader.AddVariable( "B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F' ); dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta", "MeV", 'F' ); dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' ); dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}", "MeV", 'F' ); dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' ); dataloader.AddVariable( "pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "" , 'F' ); dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable( "B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "gamma_IPCHI2_OWNPV", "IP #chi^{2}(#gamma)", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV", "IP(#Lambda(1520))", "mm", 'F' ); #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV", "IP#chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda(1520))", "" , 'F' ); dataloader.AddVariable( "B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "" , 'F' ); dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda(1520))", "", 'F' ); dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); ''' if channel == "2": dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F') dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F') dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F') dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV", 'F') dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F') dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)", "#beta", "", 'F') #ok #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}", "MeV", 'F' );#BDT learned Mass check1 dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P", "P_{tot,2}", "MeV", 'F') #ok #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)", "#eta(K^{-})-#eta(p^{+})", "", 'F') dataloader.AddVariable( "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)", "#eta(#Lambda*)", "", 'F') dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)", "#eta(#gamma)", "", 'F') dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})", "", 'F') #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV", "#chi^{2}_{IP}(K^{-})", "" , 'F' ); dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})", "", 'F') dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda*)", "", 'F') dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda*)", "", 'F') dataloader.AddVariable("B_OWNPV_CHI2", "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F') #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#BDT learned Mass check1 #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF", "#chi^{2}_{DTF}/n_{dof}", "" , 'F' );#put it out because array #dataloader.AddVariable( "B_DIRA_OWNPV", "DIRA(#Lambda_{b})", "" , 'F' ); #not used by BDT #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV", "DIRA(#Lambda*)", "" , 'F' ); #not used #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV", "FD #chi^{2}(#Lambda*)", "", 'F' ); #not used #dataloader.AddVariable( "B_FDCHI2_OWNPV", "#chi^{2}_{FD}(#Lambda_{b})", "", 'F' ); #not used # Add Spectator Variables: not used for Training but written in final TestTree #dataloader.AddSpectator( "B_M", "M(#Lambda_{b})", "MeV"); #dataloader.AddSpectator( "Lambda_1520_0_M", "M(#Lambda*)", "MeV"); # Read input data if gSystem.AccessPathName(sigfname) != 0: print "Can not find %s" % sigfname if gSystem.AccessPathName(bkgfname) != 0: print "Can not find %s" % bkgfname inputSig = TFile.Open(sigfname) inputBkg = TFile.Open(bkgfname) # Get the signal and background trees for training signal = inputSig.Get(treeNameSig) background = inputBkg.Get(treeNameBkg) # Global event weights (see below for setting event-wise weights) signalWeight = 1.0 backgroundWeight = 1.0 # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees dataloader.AddSignalTree(signal, signalWeight) dataloader.AddBackgroundTree(background, backgroundWeight) # To give different trees for training and testing, do as follows: # dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # dataloader.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : dataloader.SetSignalWeightExpression ("weight1*weight2"); # for background: dataloader.SetBackgroundWeightExpression("weight1*weight2"); #dataloader.SetBackgroundWeightExpression( "weight" ) # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) mycutSig = TCut( "" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample #print(sigfname + str( mycutSig ) + treeNameSig) mycutBkg = TCut( "B_M>6120" ) #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" ) #print(bkgfname + str( mycutBkg ) + treeNameBkg) # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart") if "CutsD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" ) if "CutsSA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V") # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss") # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ) if "FDA_GA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ) if "FDA_SA" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) if "FDA_MT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ) if "FDA_GAMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ) if "FDA_MCMT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ) # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) #Old if "MLPBFGS" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) # n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm") # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2" ) #Settings3 #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old if "BDT" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs print("FLAG 0") factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
def main(): try: # retrive command line options shortopts = "m:i:n:d:k:l:t:o:s:vh?" longopts = [ "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose", "help", "usage" ] opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infname = DEFAULT_INFNAME treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS nTrees = DEFAULT_NTREES mDepth = DEFAULT_MDEPTH varListKey = DEFAULT_VARLISTKEY verbose = True SeedN = DEFAULT_SEED for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-m", "--methods"): methods = a elif o in ("-d", "--maxDepth"): mDepth = a elif o in ("-l", "--varListKey"): varListKey = a elif o in ("-i", "--inputfile"): infname = a elif o in ("-n", "--nTrees"): nTrees = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit(' ') trees.sort() trees.reverse() if len(trees) - trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-s", "--seed"): SeedN = long(a) elif o in ("-v", "--verbose"): verbose = True varList = varsList.varList[varListKey] nVars = str(len(varList)) + 'vars' Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth outfname = "dataset/weights/TMVA_" + Note + ".root" # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3 # check ROOT version, give alarm if 5.18 if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Import TMVA classes from ROOT from ROOT import TMVA fClassifier = TMVA.Factory( "VariableImportance", "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification" ) str_xbitset = '{:053b}'.format(SeedN) seeddl = TMVA.DataLoader(str_xbitset) bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth) bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20' bdtSetting += ':IgnoreNegWeightsInTraining=True' index = 52 for iVar in varList: if (str_xbitset[index] == '1'): seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F') print iVar[0] index = index - 1 (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note inputDir = varsList.inputDir infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root" iFileSig = TFile.Open(inputDir + infname) sigChain = iFileSig.Get("ljmet") seeddl.AddSignalTree(sigChain) bkg_list = [] bkg_trees_list = [] bkgList = varsList.bkg for i in range(len(bkgList)): bkg_list.append(TFile.Open(inputDir + bkgList[i])) bkg_trees_list.append(bkg_list[i].Get("ljmet")) bkg_trees_list[i].GetEntry(0) if bkg_trees_list[i].GetEntries() == 0: continue seeddl.AddBackgroundTree(bkg_trees_list[i], 1) signalWeight = 1 seeddl.SetSignalWeightExpression(weightStrS) seeddl.SetBackgroundWeightExpression(weightStrB) mycutSig = TCut(cutStrS) mycutBkg = TCut(cutStrB) seeddl.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting) fClassifier.TrainAllMethods() fClassifier.TestAllMethods() fClassifier.EvaluateAllMethods() SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT") print "ROC-integral : ", str_xbitset, " ", SROC print "SEED " + str_xbitset + " DONE" fClassifier.DeleteAllMethods() fClassifier.fMethodsMap.clear() print "==================================================================" print "=================================================================="
file2 = open('../mvavars2.txt') file3 = open('../mvavars3.txt') mvavars1 = [] mvavars2 = [] mvavars3 = [] for line in file1: mvavars1.append(line.strip()) for line in file2: mvavars2.append(line.strip()) for line in file3: print line mvavars3.append(line.strip()) reader3 = TMVA.Reader() var3_ = [] for i, var3 in enumerate(mvavars1): var3_.append(array.array('f',[0])) reader3.AddVariable(var3,var3_[i]) reader3.BookMVA("MLP","weights/TMVAClassification_HZZ_2e2mu_MLP.weights.xml") #weight weight ele1_pt ele1_eta ele1_phi ele1_charge ele1_trackIso ele1_EcalIso ele1_HcalIso ele1_X ele1_SIP ele2_pt ele2_eta ele2_phi ele2_charge ele2_trackIso ele2_EcalIso ele2_HcalIso ele2_X ele2_SIP ele3_pt ele3_eta ele3_phi ele3_charge ele3_trackIso ele3_EcalIso ele3_HcalIso ele3_X ele3_SIP ele4_pt ele4_eta ele4_phi ele4_charge ele4_trackIso ele4_EcalIso ele4_HcalIso ele4_X ele4_SIP #worst_iso_X second_worst_iso_X worst_vertex second_worst_vertex mZ mZstar mbestH index channel sample end def loop(hweights, sig, hs, c1): #------- sig events ---------
def main(o, args): # Import TMVA classes from ROOT from ROOT import TMVA, TFile, TCut # Output file outputFile = TFile(o.outfile % {"label": o.label}, 'RECREATE') atype = "Classification" if hasattr(o, "type"): atype = str(o.type) factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=%s" % atype) # Set verbosity factory.SetVerbose(o.verbose) TMVA.Config.Instance().GetIONames().fWeightFileDir = o.weightsdir # variables if type(o.variables) == str: o.variables = [ v.lstrip().rstrip() for v in o.variables.split(":") if v != "" ] allvars = "" for v in o.variables: factory.AddVariable(str(v)) if allvars != "": allvars += ":" allvars += v.split(":=")[0].lstrip(" ").rstrip(" ") #print "variables %s" % allvars #print o.spectators for s in o.spectators: if not s in o.variables: factory.AddSpectator(str(s)) # categories and sub categories categories = [] subcategories = [] if hasattr(o, "subcategories") and len(o.subcategories) > 0: subcategories = o.subcategories[0] for sc in o.subcategories[1:]: subcategories = map( lambda x: (TCut(x[0][0]) * TCut(x[1][0]), "%s_%s" % (x[0][1], x[1][1])), itertools.product(subcategories, sc)) for cut, name, vars in o.categories: myvars = allvars if vars != "": for v in vars.split(":"): myvars = myvars.replace(v, "").replace("::", ":") myvars = myvars.rstrip(":") vars = str(myvars) #print vars if len(subcategories) > 0: for subcut, subname in subcategories: if subname == "": subname = subname.replace(" ", "").replace( ">", "_gt_").replace("<", "_lt_").replace( "=", "_eq_").replace("&", "_and_") fullname = "%s_%s" % (name, subname) categories.append( (TCut(cut) * TCut(subcut), str(fullname), vars)) else: categories.append((TCut(cut), str(name), vars)) # load tree selection = TCut(o.selection) from ROOT import TTree from array import array for evclass, info in o.classes.iteritems(): samples = info["samples"] for filePattern, name, weight, cut, ttype, ID in samples: #tcut=TCut(cut)*selection chain = mkChain(getListOfFiles(o.indir, filePattern), name) # print ID # br = array('i',[ID]) # #use an auxiliary tree to store the procID... # tempf = TFile('procID_'+str(name.split('/')[-1])+'_'+str(ttype)+'.root', "RECREATE") # TF = TTree("TF_"+str(name.split('/')[-1])+'_'+str(ttype), "friend tree with procID") # TF.Branch("procID", br, "procID/I" ) # for i in range(chain.GetEntries()): # TF.Fill() # TF.Write() # tempf.Close() # #... and add it as a friend to the main chain # chain.AddFriend('TF_'+str(name.split('/')[-1])+'_'+str(ttype),'procID_'+str(name.split('/')[-1])+'_'+str(ttype)+'.root') #then add the chain for s in o.commonCuts: cut = cut + " * (" + str(s) + ")" tcut = TCut(cut) factory.AddTree(chain, str(evclass), float(weight), tcut, int(ttype)) # weights if "weight" in info: weight = info["weight"] factory.AddSpectator(str("%s_wei := %s" % (evclass, weight))) factory.SetWeightExpression(str(weight), str(evclass)) else: factory.SetWeightExpression("1.", str(evclass)) # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( TCut(""), "SplitMode=Random:NormMode=NumEvents:!V") # -------------------------------------------------------------------------------------------------- # Fisher discriminant (same as LD) defaultSettings = { "BDT": "!H:!V:!CreateMVAPdfs:BoostType=Grad:UseBaggedBoost:NegWeightTreatment=NoNegWeightsInTraining" ":GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=2000:MinNodeSize=0.125" ":Shrinkage=0.1:NTrees=200:!UseYesNoLeaf:MaxDepth=3", "Cuts": "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" } if "FisherD" in o.methods: mname = "FisherD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs:VarTransform=D") if "Fisher" in o.methods: mname = "Fisher%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, TString(mname)) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod(cut, vars, TMVA.Types.kFisher, "%s_%s" % (mname, name), "!H:!V:Fisher:!CreateMVAPdfs") if "Likelihood" in o.methods: mname = "Likelihood%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "LikelihoodD" in o.methods: mname = "LikelihoodD%s" % o.label fcats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % (cut, name, vars) fcats.AddMethod( cut, vars, TMVA.Types.kLikelihood, "%s_%s" % (mname, name), "!H:!V:!CreateMVAPdfs:!TransformOutput:VarTransform=D:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=150" ) if "BDT" in o.methods: mname = "BDT%s" % o.label settings = defaultSettings["BDT"] if hasattr(o, "settings") and "BDT" in o.settings: settings = str(o.settings["BDT"]) if len(categories) == 0: print "booking method %s with settings %s" % (TMVA.Types.kBDT, settings) cats = factory.BookMethod(TMVA.Types.kBDT, TString(mname), settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kBDT, "%s_%s" % (mname, name), settings) if "Cuts" in o.methods: mname = "Cuts%s" % o.label settings = defaultSettings["Cuts"] if hasattr(o, "settings") and "Cuts" in o.settings: settings = str(o.settings["Cuts"]) if len(categories) == 0: cats = factory.BookMethod(TMVA.Types.kCuts, mname, settings) else: cats = factory.BookMethod(TMVA.Types.kCategory, mname) for cut, name, vars in categories: print "booking sub-category classifier : %s %s %s" % ( cut, name, vars) cats.AddMethod(cut, vars, TMVA.Types.kCuts, "%s_%s" % (mname, name), settings) # ---- Now you can tell the factory to train, test, and evaluate the MVAs. if o.optimize: print "Optimizing?" factory.OptimizeAllMethods() factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() # Save the output. outputFile.Close()
def TMVARegression(): try: # retrieve command line options shortopts = "a:o:vh?" longopts = ["analysis=","outputfile=", "verbose", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) _outfname = OUTFNAME _analysis = ANALYSIS verbose = False for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-o", "--outputfile"): _outfname = a elif o in ("-a", "--analysis"): _analysis = a elif o in ("-v", "--verbose"): verbose = True # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( _outfname, 'RECREATE' ) # // Create the factory object. Later you can choose the methods # // whose performance you'd like to investigate. The factory will # // then run the performance analysis for you. # // # // The first argument is the base of the name of all the # // weightfiles in the directory weights_Reg/ # // # // The second argument is the output file for the training results # // All TMVA output can be suppressed by removing the "!" (not) in # // front of the "Silent" argument in the option string factory = TMVA.Factory ("TMVARegression", outputFile, "!V:!Silent:Color:DrawProgressBar" ) # Set verbosity factory.SetVerbose( verbose ) TMVA.gConfig().GetIONames().fWeightFileDir = "weights_Reg_8TeV" + "_" + _analysis if _analysis == "Dijet": factory.AddVariable("hJet_pt", "hJet_pt", "units", 'F') factory.AddVariable("hJet_eta", "hJet_eta", "units", 'F') factory.AddVariable("hJet_phi", "hJet_phi", "units", 'F') factory.AddVariable("hJet_e", "hJet_e", "units", 'F') factory.AddVariable("hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "hJet_ptRaw*((hJet_ptRaw+resolutionBias(fabs(hJet_eta))*(hJet_ptRaw-hJet_genPt))/hJet_ptRaw)", "units", 'F') factory.AddVariable("hJet_Mt:=evalMt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Mt", "units", 'F') factory.AddVariable("hJet_Et:=evalEt(hJet_pt, hJet_eta, hJet_phi, hJet_e)","hJet_Et", "units", 'F') factory.AddVariable("hJet_ptLeadTrack", "hJet_ptLeadTrack", "units", 'F') factory.AddVariable("hJet_vtxPt", "hJet_vtxPt", "units", 'F') factory.AddVariable("hJet_vtx3dL", "hJet_vtx3dL", "units", 'F') factory.AddVariable("hJet_vtx3deL", "hJet_vtx3deL", "units", 'F') factory.AddVariable("hJet_vtxMass", "hJet_vtxMass", "units", 'F') factory.AddVariable("hJet_chf", "hJet_chf", "units", 'F') factory.AddVariable("hJet_nch", "hJet_nch", "units", 'F') factory.AddVariable("hJet_nconstituents", "hJet_nconstituents", "units", 'F') factory.AddVariable("hJet_JECUnc", "hJet_JECUnc", "units", 'F') factory.AddVariable("rho25", "rho25", "units", 'F') factory.AddVariable("MET.et", "MET.et", "units", 'F') factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, hJet_phi[0], hJet_phi[1])","METdPhi", "units",'F') #Add the variable carrying the regression target factory.AddTarget( "hJet_genPt" ) elif _analysis == "Subjet": factory.AddVariable("fathFilterJets_pt", "fathFilterJets_pt", "units", 'F') factory.AddVariable("fathFilterJets_eta", "fathFilterJets_eta", "units", 'F') factory.AddVariable("fathFilterJets_phi", "fathFilterJets_phi", "units", 'F') factory.AddVariable("fathFilterJets_e", "fathFilterJets_e", "units", 'F') factory.AddVariable("fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "fathFilterJets_ptRaw*((fathFilterJets_ptRaw+resolutionBias(fabs(fathFilterJets_eta))*(fathFilterJets_ptRaw-fathFilterJets_genPt))/fathFilterJets_ptRaw)", "units", 'F') factory.AddVariable("fathFilterJets_Mt:=evalMt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Mt", "units", 'F') factory.AddVariable("fathFilterJets_Et:=evalEt(fathFilterJets_pt, fathFilterJets_eta, fathFilterJets_phi, fathFilterJets_e)","fathFilterJets_Et", "units", 'F') factory.AddVariable("fathFilterJets_ptLeadTrack", "fathFilterJets_ptLeadTrack", "units", 'F') factory.AddVariable("fathFilterJets_vtxPt", "fathFilterJets_vtxPt", "units", 'F') factory.AddVariable("fathFilterJets_vtx3dL", "fathFilterJets_vtx3dL", "units", 'F') factory.AddVariable("fathFilterJets_vtx3deL", "fathFilterJets_vtx3deL", "units", 'F') factory.AddVariable("fathFilterJets_vtxMass", "fathFilterJets_vtxMass", "units", 'F') factory.AddVariable("fathFilterJets_chf", "fathFilterJets_chf", "units", 'F') factory.AddVariable("rho25", "rho25", "units", 'F') factory.AddVariable("MET.et", "MET.et", "units", 'F') factory.AddVariable("METdPhi:=METdeltaPhi(MET.phi, fathFilterJets_phi[0], fathFilterJets_phi[1])","METdPhi", "units",'F') factory.AddTarget("fathFilterJets_genPt") else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) ## Get the Signal trees en7TeV = False en8TeV = True regWeight = 1. chain = TChain("tree") if en7TeV: #change the ntuple names later!! chain.Add("Step2_output_May11/WH_125_ForRegression.root") chain.Add("Step2_output_May11/WH_115_ForRegression.root") chain.Add("Step2_output_May11/WH_120_ForRegression.root") chain.Add("Step2_output_May11/WH_130_ForRegression.root") chain.Add("Step2_output_May11/WH_135_ForRegression.root") #if en8TeV and _analysis == "Dijet": # chain.Add("dcache:/pnfs/cms/WAX/11/store/user/lpchbb/apana/Step1V33_Step2_V2/DiJetPt_ZH_ZToLL_HToBB_M-110_8TeV-powheg-herwigpp.root") if en8TeV: chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_110_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_115_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_120_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_125_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_130_summer12_33b.root") chain.Add("/uscmst1b_scratch/lpc1/lpctrig/apana/Higgs/Step2/NtupleV34/CMSSW_5_2_5/src/VHbbAnalysis/VHbbDataFormats/bin/Step2/ZH/ZH_135_summer12_33b.root") NEntries = chain.GetEntries() print "Number of entries on Chain:",NEntries regTree = chain factory.AddRegressionTree( regTree, regWeight ) #This would set individual event weights (the variables defined in the #expression need to exist in the original TTree) #factory->SetWeightExpression( "var1", "Regression" ) if _analysis == "Dijet": cutString=\ "(Vtype == 0 || Vtype == 1)" + " && " +\ "hJet_pt[0] > 20.0" + " && " +\ "hJet_pt[1] > 20.0" + " && " +\ "hJet_genPt[0] > 0.0" + " && " +\ "hJet_genPt[1] > 0.0" + " && " +\ "hJet_eta[0] < 2.4" + " && " +\ "hJet_eta[1] < 2.4" + " && " +\ "hJet_id[0] > 0.0" + " && " +\ "hJet_id[1] > 0.0" + " && " +\ "max(hJet_csv[0],hJet_csv[1]) > 0.0" + " && " +\ "min(hJet_csv[0],hJet_csv[1]) > 0.0" + " && " +\ "H.pt > 100" elif _analysis == "Subjet": cutString=\ "(Vtype == 0 || Vtype == 1)" + " && " +\ "fathFilterJets_pt[0] > 20.0" + " && " +\ "fathFilterJets_pt[1] > 20.0" + " && " +\ "fathFilterJets_genPt[0] > 0.0" + " && " +\ "fathFilterJets_genPt[1] > 0.0" + " && " +\ "fathFilterJets_eta[0] < 2.4" + " && " +\ "fathFilterJets_eta[1] < 2.4" + " && " +\ "max(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0" + " && " +\ "min(fathFilterJets_csv[0],fathFilterJets_csv[1]) > 0.0" + " && " +\ "FatH.filteredpt > 100" else: print "Problem specifying analysis. Please choose Dijet or Subjet." sys.exit(1) print cutString mycut = TCut( cutString ) # tell the factory to use all remaining events in the trees after training for testing. The number is 25% of the events after cuts: if en7TeV: factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=125000:nTest_Regression=125000:SplitMode=Random:NormMode=NumEvents:!V" ) if en8TeV: factory.PrepareTrainingAndTestTree( mycut, "nTrain_Regression=111000:nTest_Regression=111000:SplitMode=Random:NormMode=NumEvents:!V" ) #If no numbers of events are given, half of the events in the tree are used #for training, and the other half for testing: #factory.PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" ); #---- Book MVA methods #please lookup the various method configuration options in the corresponding cxx files, eg: #src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html #it is possible to preset ranges in the option string in which the cut optimisation should be done: #"...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable #Boosted Decision Trees factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=60:nEventsMin=5:BoostType=AdaBoostR2:SeparationType=RegressionVariance:nCuts=20:PruneMethod=CostComplexity:PruneStrength=30" ) # ------------------------------------------------------------------------------------------- #---- Now you can tell the factory to train, test, and evaluate the MVAs # Train MVAs using the set of training events factory.TrainAllMethods() # ---- Evaluate all MVAs using the set of test events factory.TestAllMethods() # ----- Evaluate and compare performance of all configured MVAs factory.EvaluateAllMethods() # -------------------------------------------------------------- NEntries = regTree.GetEntries() print "Number of entries on Tree: ",NEntries # Save the output outputFile.Close() print "==> Wrote root file %s\n" % _outfname print "==> TMVARegression is done!\n"
from rootpy.tree import Cut from rootpy.io import root_open from ROOT import TMVA infile = root_open('sample.root') outfile = root_open('tmva_output.root', 'recreate') factory = TMVA.Factory("TMVAClassification", outfile, "AnalysisType=Classification") factory.AddVariable("a", 'F') factory.AddVariable("b", 'F') factory.SetInputTrees(infile.sample, Cut('label==1'), Cut('label==0')) factory.PrepareTrainingAndTestTree(Cut(), Cut(), "SplitMode=Random:NormMode=NumEvents") factory.BookMethod( TMVA.Types.kBDT, "BDT", "NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=-1" ) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() outfile.close() infile.close()
def main(): try: # retrive command line options shortopts = "w:m:i:j:f:g:t:o:a:vgh?" longopts = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"] opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts ) except getopt.GetoptError: # print help information and exit: print "ERROR: unknown options in argument %s" % sys.argv[1:] usage() sys.exit(1) infnameSig = DEFAULT_INFNAMESIG infnameBkg = DEFAULT_INFNAMEBKG friendfnameSig = DEFAULT_FRIENDNAMESIG friendfnameBkg = DEFAULT_FRIENDNAMEBKG treeNameSig = DEFAULT_TREESIG treeNameBkg = DEFAULT_TREEBKG outfname = DEFAULT_OUTFNAME methods = DEFAULT_METHODS weight_fold = "weights" verbose = False gui = False addedcuts = "" for o, a in opts: if o in ("-?", "-h", "--help", "--usage"): usage() sys.exit(0) elif o in ("-w", "--weight_fold"): weight_fold = a elif o in ("-m", "--methods"): methods = a elif o in ("-i", "--inputfilesig"): infnameSig = a elif o in ("-j", "--inputfilebkg"): infnameBkg = a elif o in ("-f", "--friendinputfilesig"): friendfnameSig = a elif o in ("-g", "--friendinputfilebkg"): friendfnameBkg = a elif o in ("-o", "--outputfile"): outfname = a elif o in ("-a", "--addedcuts"): addedcuts = a elif o in ("-t", "--inputtrees"): a.strip() trees = a.rsplit( ' ' ) trees.sort() trees.reverse() if len(trees)-trees.count('') != 2: print "ERROR: need to give two trees (each one for signal and background)" print trees sys.exit(1) treeNameSig = trees[0] treeNameBkg = trees[1] elif o in ("-v", "--verbose"): verbose = True elif o in ("-g", "--gui"): gui = True # Print methods mlist = methods.replace(' ',',').split(',') print "=== TMVAClassification: use method(s)..." for m in mlist: if m.strip() != '': print "=== - <%s>" % m.strip() # Print the file print "Using file " + infnameSig + " for signal..." print "Using file " + infnameBkg + " for background..." # Import ROOT classes from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut # check ROOT version, give alarm if 5.18 print "ROOT version is " + str(gROOT.GetVersionCode()) if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544: print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA" print "*** does not run properly (function calls with enums in the argument are ignored)." print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples)," print "*** or use another ROOT version (e.g., ROOT 5.19)." sys.exit(1) # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI gROOT.SetMacroPath( "./" ) ## SO I TAKE DEFAULT FORM ROOT# gROOT.Macro ( "./TMVAlogon.C" ) gROOT.LoadMacro ( "./TMVAGui.C" ) # Import TMVA classes from ROOT from ROOT import TMVA # Output file outputFile = TFile( outfname, 'RECREATE' ) # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options) # All TMVA output can be suppressed by removing the "!" (not) in # front of the "Silent" argument in the option string factory = TMVA.Factory( "TMVAClassification", outputFile, "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" ) # Set verbosity factory.SetVerbose( verbose ) # If you wish to modify default settings # (please check "src/Config.h" to see all available global options) # gConfig().GetVariablePlotting()).fTimesRMS = 8.0 (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold; # Define the input variables that shall be used for the classifier training # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)" # [all types of expressions that can also be parsed by TTree::Draw( "expression" )] factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' ) factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' ) factory.AddVariable( "dR_bl", "dR_bl", "", 'F' ) factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' ) factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' ) factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' ) factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' ) factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' ) factory.AddVariable( "mass_trans", "mass_trans", "", 'F' ) factory.AddVariable( "MT2", "MT2", "", 'F' ) factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' ) factory.AddVariable( "MMC_h2massweight1_prob", "MMC_h2massweight1_prob", "", 'F' ) ##ADDED # You can add so-called "Spectator variables", which are not used in the MVA training, # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the # input variables, the response values of all trained MVAs, and the spectator variables # factory.AddSpectator( "spec1:=var1*2", "Spectator 1", "units", 'F' ) # factory.AddSpectator( "spec2:=var1*3", "Spectator 2", "units", 'F' ) # Read input data if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname ) inputSig = TFile.Open( infnameSig ) inputBkg = TFile.Open( infnameBkg ) # Get the signal and background trees for training signal = inputSig.Get( treeNameSig ) background = inputBkg.Get( treeNameBkg ) ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig ) ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg ) # Global event weights (see below for setting event-wise weights) signalWeight = 1. backgroundWeight = 1. #I don't think there's a general answer to this. The safest 'default' #is to use the envent weight such that you have equal amounts of signal #and background #for the training, otherwise for example: if you look for a rare #signal and you use the weight to scale the number of events according #to the expected ratio of signal and background #according to the luminosity... the classifier sees hardly any signal #events and "thinks" .. Oh I just classify everything background and do #a good job! # #One can try to 'optimize' the training a bit more in either 'high #purity' or 'high efficiency' by choosing different weights, but as I #said, there's no fixed rule. You'd have #to 'try' and see if you get better restults by playing with the weights. # ====== register trees ==================================================== # # the following method is the prefered one: # you can add an arbitrary number of signal or background trees factory.AddSignalTree ( signal, signalWeight ) factory.AddBackgroundTree( background, backgroundWeight ) # To give different trees for training and testing, do as follows: # factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" ) # factory.AddSignalTree( signalTestTree, signalTestWeight, "Test" ) # Use the following code instead of the above two or four lines to add signal and background # training and test events "by hand" # NOTE that in this case one should not give expressions (such as "var1+var2") in the input # variable definition, but simply compute the expression before adding the event # # # --- begin ---------------------------------------------------------- # # ... *** please lookup code in TMVA/macros/TMVAClassification.C *** # # # --- end ------------------------------------------------------------ # # ====== end of register trees ============================================== # Set individual event weights (the variables must exist in the original TTree) # for signal : factory.SetSignalWeightExpression ("weight1*weight2"); # for background: factory.SetBackgroundWeightExpression("weight1*weight2"); # Apply additional cuts on the signal and background sample. # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" ) #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) mycutSig = TCut( addedcuts ) #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) mycutBkg = TCut( addedcuts ) #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) print mycutSig # Here, the relevant variables are copied over in new, slim trees that are # used for TMVA training and testing # "SplitMode=Random" means that the input events are randomly shuffled before # splitting them into training and test samples factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" ) # -------------------------------------------------------------------------------------------------- # ---- Book MVA methods # # please lookup the various method configuration options in the corresponding cxx files, eg: # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html # it is possible to preset ranges in the option string in which the cut optimisation should be done: # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable # Cut optimisation if "Cuts" in mlist: factory.BookMethod( TMVA.Types.kCuts, "Cuts", "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" ) if "CutsD" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsD", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" ) if "CutsPCA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" ) if "CutsGA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsGA", "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" ) if "CutsSA" in mlist: factory.BookMethod( TMVA.Types.kCuts, "CutsSA", "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ) # Likelihood ("naive Bayes estimator") if "Likelihood" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" ) # Decorrelated likelihood if "LikelihoodD" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" ) # PCA-transformed likelihood if "LikelihoodPCA" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) # Use a kernel density estimator to approximate the PDFs if "LikelihoodKDE" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) # Use a variable-dependent mix of splines and kernel density estimator if "LikelihoodMIX" in mlist: factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) # Test the multi-dimensional probability density estimator # here are the options strings for the MinMax and RMS methods, respectively: # "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" ); # "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" ); if "PDERS" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERS", "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" ) if "PDERSD" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" ) if "PDERSPCA" in mlist: factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" ) # Multi-dimensional likelihood estimator using self-adapting phase-space binning if "PDEFoam" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam", "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" ) if "PDEFoamBoost" in mlist: factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost", "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" ) # K-Nearest Neighbour classifier (KNN) if "KNN" in mlist: factory.BookMethod( TMVA.Types.kKNN, "KNN", "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" ) # H-Matrix (chi2-squared) method if "HMatrix" in mlist: factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) # Linear discriminant (same as Fisher discriminant) if "LD" in mlist: factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher discriminant (same as LD) if "Fisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" ) # Fisher with Gauss-transformed input variables if "FisherG" in mlist: factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" ) # Composite classifier: ensemble (tree) of boosted Fisher classifiers if "BoostedFisher" in mlist: factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" ) # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA) if "FDA_MC" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MC", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" ); if "FDA_GA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" ); if "FDA_SA" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_SA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" ); if "FDA_MT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" ); if "FDA_GAMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" ); if "FDA_MCMT" in mlist: factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" ); # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons if "MLP" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" ) if "MLPBFGS" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" ) if "MLPBNN" in mlist: factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators # CF(Clermont-Ferrand)ANN if "CFMlpANN" in mlist: factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N" ) # n_cycles:#nodes:#nodes:... # Tmlp(Root)ANN if "TMlpANN" in mlist: factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ) #n_cycles:#nodes:#nodes:... # Support Vector Machine if "SVM" in mlist: factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" ) # Boosted Decision Trees if "BDTG" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" ) if "BDT" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDT", "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTB" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTB", "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" ) if "BDTD" in mlist: factory.BookMethod( TMVA.Types.kBDT, "BDTD", "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" ) # RuleFit -- TMVA implementation of Friedman's method if "RuleFit" in mlist: factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit", "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" ) # -------------------------------------------------------------------------------------------------- # ---- Now you can tell the factory to train, test, and evaluate the MVAs. # Train MVAs factory.TrainAllMethods() # Test MVAs factory.TestAllMethods() # Evaluate MVAs factory.EvaluateAllMethods() # Save the output. outputFile.Close() print "=== wrote root file %s\n" % outfname print "=== TMVAClassification is done!\n" # open the GUI for the result macros if( gui ): gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname ) # keep the ROOT thread running gApplication.Run()
'', 'F', 0.00, 2.00, array.array('f', [0]), TBranch(), 0.00, 1.10, 22 ] variables['pttot_lep1_lep2'] = [ '', 'F', 0.00, 2.00, array.array('f', [0]), TBranch(), 0.00, 1.10, 22 ] variables['ptdiff_lep1_lep2'] = [ '', 'F', 0.00, 2.00, array.array('f', [0]), TBranch(), 0.00, 1.10, 22 ] reader = TMVA.Reader() hists = {} for varName, var in sorted(variables.iteritems()): if withRecoTraining and withRelativeVariables and varName == Energy_scale: continue reader.AddVariable(varName, var[4]) if withRecoTraining: a = array.array('f', [0]) reader.AddSpectator("truth_mass", a) reader.AddSpectator("visible_mass", a) reader.AddSpectator("mmc0_resonance_m", a) reader.AddSpectator("mmc1_resonance_m", a) reader.AddSpectator("mass_collinear_tau1_tau2", a)