Exemplo n.º 1
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA.root', 'RECREATE')
    factory = TMVA.Factory('TMVAClassification', output,
            '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('dataset')
Exemplo n.º 2
0
def get_correlation_matrix(year, variables):
    # Returns the correlation matrix of the given variables
    # Get signal and background paths
    signal_path = os.path.join(os.getcwd(),
                               varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018,
                               varsList.sig2017_0[0] if year == 2017 else varsList.sig2018_0[0])
    bkgrnd_path = os.path.join(os.getcwd(),
                               varsList.step2Sample2017 if year == 2017 else varsList.step2Sample2018,
                               varsList.bkg2017_0[0] if year == 2017 else varsList.bkg2018_0[0])

    # Create TMVA object
    loader = TMVA.DataLoader("tmva_data")

    # Load used variables
    for var in variables:
        try:
            var_data = varsList.varList["DNN"][[v[0] for v in varsList.varList["DNN"]].index(var)]
            loader.AddVariable(var_data[0], var_data[1], var_data[2], "F")
        except ValueError:
            print("[WARN] The variable {} was not found. Omitting.".format(var))

    # Open ROOT files
    signal_f = TFile.Open(signal_path)
    signal = signal_f.Get("ljmet")
    bkgrnd_f = TFile.Open(bkgrnd_path)
    bkgrnd = bkgrnd_f.Get("ljmet")

    # Load signal and background
    loader.AddSignalTree(signal)
    loader.fTreeS = signal
    loader.AddBackgroundTree(bkgrnd)
    loader.fTreeB = bkgrnd

    # Set weights
    loader.SetSignalWeightExpression(weight_string)
    loader.SetBackgroundWeightExpression(weight_string)

    # Set cuts
    loader.PrepareTrainingAndTestTree(
        cut_string, cut_string,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V:VerboseLevel=Info"
    )
    
    # Set the pointer to the right histogram
    loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection()
    
    # Retrieve the signal correlation matrix
    sig_th2 = loader.GetCorrelationMatrix("Signal")

    n_bins = sig_th2.GetNbinsX()
    sig_corr = np.zeros((n_bins, n_bins))
    
    for x in range(n_bins):
        for y in range(n_bins):
            sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1)
    
    return sig_corr
Exemplo n.º 3
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    output = TFile.Open('TMVA_SSSF.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
    # factory = TMVA.Factory('TMVAClassification', output,   '!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification')

    dataloader = TMVA.DataLoader('datasetSSSF04Feb')
    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        print sampleName
        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut), 'SplitMode=Random:NormMode=NumEvents:!V')

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C2",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=200:MaxDepth=2" );
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C1",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=100:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4C05",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=50:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4500",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2"
    )
    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDTG4750",
        "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=750:MaxDepth=2"
    )

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
Exemplo n.º 4
0
def TMVANN(filenameSTRING, outputDOTrootSTRING, sigtreeSTRINGS, bkgtreeSTRING,
           variablesSTRING):
    NNfile = R.TFile(outputDOTrootSTRING, "recreate")
    NNfile.Close()
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)  #importing the datasetsignalslowlevel
        signaltree = file.Get(sigtreeSTRING)  #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING)  #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING + weights)
        bkgweights = file.Get(bkgtreeSTRING + weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(
            outputDOTrootSTRING,
            "update")  #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory(
            "TMVANN", NNfile,
            "V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
        )
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        datasetsignalslowlevel = TMVA.DataLoader(
            "datasetsignalslowlevel")  #Instantiating a datasetsignalslowlevel
        datasetsignalslowlevel.AddSignalTree(signaltree, 1.)  #adding signal
        datasetsignalslowlevel.AddBackgroundTree(backgroundtree,
                                                 1.)  #adding background
        print(sigweights)
        datasetsignalslowlevel.SetSignalWeightExpression(weights)
        datasetsignalslowlevel.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:  #adding our training variables to the TMVA
            datasetsignalslowlevel.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        datasetsignalslowlevel.PrepareTrainingAndTestTree(
            signalcut, backgroundcut,
            "nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V"
        )
        TMVAfactory.BookMethod(
            datasetsignalslowlevel, TMVA.Types.kMLP,
            "LowLevelNN_3layer25,20,10_100Epoch_tanhNeuron" + sigtreeSTRING,
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=25,20,10:TestRate=5"
        )

        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
    NNfile = R.TFile(outputDOTrootSTRING, "update")
Exemplo n.º 5
0
def runJob():
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    dataloader = TMVA.DataLoader('dataset_8Feb')
    output = TFile.Open('TMVA16.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

    for br in config.mvaVariables:
        dataloader.AddVariable(br)

    for sampleName, sample in config.samples.items():
        if config.structure[sampleName]['isData'] == 1:
            continue

        sample['tree'] = TChain("Events")
        for f in sample['name']:
            sample['tree'].Add(f)

        if config.structure[sampleName]['isSignal'] == 1:
            dataloader.AddSignalTree(sample['tree'], 1.0)
        else:
            dataloader.AddBackgroundTree(sample['tree'], 1.0)
        # output_dim += 1
    dataloader.PrepareTrainingAndTestTree(
        TCut(config.cut),
        'SplitMode=Random::SplitSeed=10:NormMode=EqualNumEvents')

    factory.BookMethod(
        dataloader, TMVA.Types.kBDT, "BDT",
        "!H:!V:NTrees=500:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=500"
    )
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT8","!H:!V:NTrees=1500:MinNodeSize=0.5%:MaxDepth=1:BoostType=AdaBoost:AdaBoostBeta=0.75:SeparationType=GiniIndex:nCuts=1000" );

    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT2", "!H:!V:NTrees=1200:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.1:SeparationType=GiniIndex:nCuts=800" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT3", "!H:!V:NTrees=800:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDT4", "!H:!V:NTrees=700:MinNodeSize=0.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=500" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07"    ,   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );
    # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07",   "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" );

    # Run training, test and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    output.Close()
def getCorrelationMatrix(sigFile, bkgFile, weightStr, cutStr,
                         varList):  # gets the correlation matrix as np array
    varNames = []
    loader = TMVA.DataLoader("dataset")
    for var in varList:
        if var[0] in "NJets_MultiLepCalc":
            loader.Addvariable(var[0], var[1], var[2], "I")
        else:
            loader.AddVariable(var[0], var[1], var[2], "F")
        varNames.append(var[0])

    # open the root files
    input_sig = TFile.Open(sigFile)
    signal = input_sig.Get("ljmet")
    input_bkg = TFile.Open(bkgFile)
    background = input_bkg.Get("ljmet")

    # load in the trees
    loader.AddSignalTree(signal)
    loader.fTreeS = signal
    loader.AddBackgroundTree(background)
    loader.fTreeB = background

    # set weights
    loader.SetSignalWeightExpression(weightStr)
    loader.SetBackgroundWeightExpression(weightStr)

    # set cuts
    loader.PrepareTrainingAndTestTree(
        cutStr, cutStr,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # set the pointer to the right histogram
    loader.GetDefaultDataSetInfo().GetDataSet().GetEventCollection()

    # retrieve the signal correlation matrix
    sig_th2 = loader.GetCorrelationMatrix("Signal")
    # bkg_th2 = loader.GetCorrelationMatrix("Background")

    # convert to numpy array
    n_bins = sig_th2.GetNbinsX()
    sig_corr = np.zeros((n_bins, n_bins))

    for x in range(n_bins):
        for y in range(n_bins):
            sig_corr[x, y] = sig_th2.GetBinContent(x + 1, y + 1)

    return sig_corr, varNames
Exemplo n.º 7
0
def TMVANN (filenameSTRING,outputDOTrootSTRING,sigtreeSTRINGS,bkgtreeSTRING,variablesSTRING):
    for sigtreeSTRING in sigtreeSTRINGS:
        from ROOT import TMVA
        file = R.TFile(filenameSTRING)          #importing the dataset
        signaltree = file.Get(sigtreeSTRING)    #setting signaltree
        backgroundtree = file.Get(bkgtreeSTRING) #setting backgroundtree)
        sigweights = file.Get(sigtreeSTRING+weights)
        bkgweights = file.Get(bkgtreeSTRING+weights)
        TMVA.Tools.Instance()

        NNfile = R.TFile(outputDOTrootSTRING,"recreate")      #Writing the root file required for the TMVA factory

        TMVAfactory = TMVA.Factory("TMVANN",NNfile,"V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification")
        TMVAfactory.SetVerbose(False)  #Setting extra info (verbose) to false

        dataset = TMVA.DataLoader("dataset")     #Instantiating a dataset
        dataset.AddSignalTree(signaltree,1.)     #adding signal
        dataset.AddBackgroundTree(backgroundtree ,1.) #adding background
        print(sigweights)
        #dataset.SetSignalWeightExpression(weights)
        #dataset.SetBackgroundWeightExpression(weights)

        for i in variablesSTRING:                #adding our training variables to the TMVA
            dataset.AddVariable(i)

        signalcut = R.TCut("")  #Variables are already cut
        backgroundcut = R.TCut("")
        dataset.PrepareTrainingAndTestTree(signalcut,backgroundcut,"nTrain_Signal= 0:nTrain_Background=0:Splitmode=Random:NormMode=NumEvents:!V")
        #using all signal and background points to train, random selection, normalised to summed event weights = number of events for each tree, no verbose

        #Booking some methods
        #TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8:TestRate=5")         #Artifical Neural Network 1 layers 500 epoch

        TMVAfactory.BookMethod(dataset,TMVA.Types.kFisher, "FisherMethod","H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=60:NsmoothMVAPdf=10")    #Fisher Method
        TMVAfactory.BookMethod(dataset,TMVA.Types.kLikelihood, "BayesLikelihood","H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=60")    #Bayes likleihood
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_4Layer8Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=8,8,8,8:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer6Node_500Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=6:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_1Layer3Node_100Epoch_reluNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=100:HiddenLayers=3:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch
        TMVAfactory.BookMethod(dataset,TMVA.Types.kMLP,"ArtificialNeuralNetwork_2Layer5Node_500Epoch_tanhNeuron","H:!V:NeuronType=tanh:VarTransform=N:NCycles=500:HiddenLayers=5,5:TestRate=5")         #Artifical Neural Network 8 layers 500 epoch





        TMVAfactory.TrainAllMethods()
        TMVAfactory.TestAllMethods()
        TMVAfactory.EvaluateAllMethods()
        NNfile.Close()
        print ('TMVANN Ran & made ROOT file ' + outputDOTrootSTRING+sigtreeSTRING)
Exemplo n.º 8
0
 def _dataLoader(self, sigTreeNames, bkgTreeNames):
     self._data_loader = TMVA.DataLoader(self._options['factory']['name'])
     for value in self._variables.values():
         self._data_loader.AddVariable(value['name'], value['type'])
     #----
     for sigTreeName in sigTreeNames:
         self._data_loader.AddSignalTree(
             self._trees[sigTreeName + "_Train"], 1.0, "train")
         self._data_loader.AddSignalTree(self._trees[sigTreeName + "_Test"],
                                         1.0, "test")
     for bkgTreeName in bkgTreeNames:
         self._data_loader.AddBackgroundTree(
             self._trees[bkgTreeName + "_Train"], 1.0, "train")
         self._data_loader.AddBackgroundTree(
             self._trees[bkgTreeName + "_Test"], 1.0, "test")
     self._data_loader.SetSignalWeightExpression(
         self._options['factory']['weight'])
     self._data_loader.SetBackgroundWeightExpression(
         self._options['factory']['weight'])
     #----
     self._data_loader.PrepareTrainingAndTestTree(
         TCut(self._cuts['sig']), TCut(self._cuts['bkg']),
         self._options['prepareTrees'])
Exemplo n.º 9
0
)
#Locate and add data files
file_VBF_HH_2016 = "../inputsamples/2016/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root"
file_VBF_HH_2017 = "../inputsamples/2017/SKIM_VBFHHTo4B_CV_1_C2V_1_C3_1_13TeV-madgraph.root"
file_GGF_HH_2016 = "../inputsamples/2016/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph.root"
file_GGF_HH_2017 = "../inputsamples/2017/SKIM_GluGluToHHTo4B_node_SM_13TeV-madgraph_correctedcfg.root"
ch_sig = TChain("bbbbTree")
ch_bkg = TChain("bbbbTree")
ch_sig.AddFile(file_VBF_HH_2016)
ch_sig.AddFile(file_VBF_HH_2017)
#ch_sig.AddFile(file_VBF_HH_BSM_2016)
#ch_sig.AddFile(file_VBF_HH_BSM_2017)
ch_bkg.AddFile(file_GGF_HH_2016)
ch_bkg.AddFile(file_GGF_HH_2017)
#Load data to TMVA
dataloader = TMVA.DataLoader('GGFKiller')
dataloader.AddVariable("abs_H1_eta:=abs(H1_eta)")
dataloader.AddVariable("abs_H2_eta:=abs(H2_eta)")
dataloader.AddVariable("H1_pt")
dataloader.AddVariable("H2_pt")
dataloader.AddVariable("JJ_j1_pt")
dataloader.AddVariable("JJ_j2_pt")
dataloader.AddVariable("abs_JJ_eta:=abs(JJ_eta)")
dataloader.AddVariable("h1h2_deltaEta")
dataloader.AddVariable("h1j1_deltaR")
dataloader.AddVariable("h1j2_deltaR")
dataloader.AddVariable("h2j1_deltaR")
dataloader.AddVariable("h2j2_deltaR")
dataloader.AddVariable("abs_j1etaj2eta:=abs(j1etaj2eta)")
dataloader.AddVariable("abs_costh_HH_b1_cm:=abs(costh_HH_b1_cm)")
dataloader.AddVariable("abs_costh_HH_b2_cm:=abs(costh_HH_b2_cm)")
Exemplo n.º 10
0
def main():

    NTupName   = "JetTree"
    alg        = "TruthRawTrim"
    cuts       =  ["eta>-1.2","eta<1.2","pt>300","pt<5000","m>61","m<85"]
    vars       = ["Tau21","T2jet"]

    #NTupName   = "varTree"    
    verbose    = True
    
    #alg        = "AK10LCTRIMF5R20"
    spectators = ["m"]
    #cuts       = ["eta>-1.2","eta<1.2","pt>200","pt<350","m>61","m<85","TruthRecoMatch==1"]
    #vars       = ["TauWTA2TauWTA1","ZCUT12","Dip23","TJetVol","ActiveArea","PullC10","Angularity"]
    methods    = "Likelihood"

    print "Starting and getting arguments:"
    allargs = sys.argv[1:]    
    if len(allargs)<5:
        print "You input these args"
        print allargs
        print "Not enough args, please try again"
        return 1
    else:
        alg        = allargs[0]
        spectators = allargs[1].split(",")
        cuts       = allargs[2].split(",")
        vars       = allargs[3].split(",")
        methods    = allargs[4]
    
    print "Running with args:"
    print "  alg        = ",alg        
    print "  spectators = ",spectators 
    print "  cuts       = ",cuts       
    print "  vars       = ",vars       
    print "  methods    = ",methods    
    

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()



    #===============================
    #Read training and test data
    #===============================
    #InputDir = "../gen_20170529/"
    InputDir="../Ana_EventGeneration/"
    #InputDir="~/Downloads/"
    print "Getting inputs from: ",InputDir
    #s1 = TFile(InputDir+"ntuple_ttbar_2000.root");
    #b1 = TFile(InputDir+"ntuple_dijet_800_1400.root");
    s1 = TFile(InputDir+"ntuple_tt_test10000.root");
    b1 = TFile(InputDir+"ntuple_dijet_test10000.root");

    # Output file
    OutFileName="testout.root"
    outputFile = TFile( OutFileName, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

    # data loader
    dataloader=TMVA.DataLoader("dataset")

#     weight=""
#     weight+="pass_selection*EventWeight*CrossSection*("
#     weight+=alg+"_pt>"+pt1+" && "
#     weight+=alg+"_pt<"+pt2
#     if m1!="0":
#         weight+=" && "+alg+"_m>"+m1+" && "
#         weight+=alg+"_m<"+m2
#     weight+=")"
#     
#     #Get signal and background histograms
#     if variable=="mass":
#         histname = "akt10"+alg+"_trim_"+variable
#     else:
#         histname = alg+"_"+variable
    
    #======================================
    #Predefined cuts - for isntance on M(j1)
    #======================================
    mycuts = "1.0"
    mycutb = "1.0"

    for cut in cuts:
        placecut=cut
        if cut[:2]=="pt" or cut[:3]=="eta" or cut[:4]=="m":
            placecut = "* ("+alg+"_"+cut+")"
        else:
            placecut="* ("+cut+") "
        mycuts += placecut
        mycutb += placecut

    
    print "MyCutsSig: ",mycuts
    print "MyCutsBkg: ",mycutb

    #===================================
    #Spectator variables from tree
    #=====================================
    for spec in spectators:
        dataloader.AddSpectator( spec, 'F' )
        
    #===================================
    #MVA variables from tree
    #=====================================
    for var in vars:
        dataloader.AddVariable( var , 'F' )

    #===============================
    #Read training and test data
    #===============================
    print "Getting trees ... ",NTupName
    st1 = s1.Get(NTupName)
    bt1 = b1.Get(NTupName)

    #=========================================
    # global event weights per tree (see below for setting event-wise weights)
    #=========================================
    ws1 = 1.0
    wb1 = 1.0

    #=========================================
    # You can add an arbitrary number of signal or background trees
    #=========================================
    dataloader.AddSignalTree    ( st1, ws1 );
    #SCHSU
    #dataloader.SetSignalWeightExpression("EventWeight*CrossSection");

    dataloader.AddBackgroundTree( bt1, wb1 );
    #dataloader.SetBackgroundWeightExpression("EventWeight*CrossSection");
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    mycutSig = TCut(mycuts)
    mycutBkg = TCut(mycutb)
    
    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http:#tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
# DEFAULT
#         factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
#                             "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )
# CHOOSE RIGID VOLUME SO IT DOESNT TAKE SO LONG
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Unscaled:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )


    if "PDERSD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % OutFileName
    print "=== TMVAClassification is done!\n"
Exemplo n.º 11
0
output = TFile.Open('BinaryClassificationKeras.root', 'RECREATE')
factory = TMVA.Factory(
    'TMVAClassification', output,
    '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
)

# Load data
if not isfile('tmva_class_example.root'):
    call(['curl', '-O', 'http://root.cern.ch/files/tmva_class_example.root'])

data = TFile.Open('tmva_class_example.root')
signal = data.Get('TreeS')
background = data.Get('TreeB')

dataloader = TMVA.DataLoader('BinaryClassificationKeras')
for branch in signal.GetListOfBranches():
    dataloader.AddVariable(branch.GetName())

dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(
    TCut(''),
    'nTrain_Signal=4000:nTrain_Background=4000:SplitMode=Random:NormMode=NumEvents:!V'
)

# Generate model

# Define model
model = Sequential()
model.add(Dense(64, init='glorot_normal', activation='relu', input_dim=4))
Exemplo n.º 12
0
Arquivo: train.py Projeto: bb511/B2DD
def add_variables(data_loader, good_variables):

    for variable in good_variables:
        if variable != "Bc_M":
            data_loader.AddVariable(variable, "F")

    return data_loader


if __name__ == '__main__':
    decay_name = decay_names[decay_nb]
    output_file = TFile("~/TMVA/TMVAoutput" + decay_name + str(run) + ".root",
                        "RECREATE")
    factory = TMVA.Factory("TMVA_" + decay_name, output_file,
                           "DrawProgressBar=True")
    data_loader = TMVA.DataLoader("dataloader")

    moca_tree = dhand.combine_trees(run, decay_name, True)
    data_tree = dhand.combine_trees(run, decay_name, False)
    branches_to_keep = dhand.branch_selection(data_tree, branches, [])
    moca_tree = dhand.activate_branches(moca_tree, branches_to_keep)
    data_tree = dhand.activate_branches(data_tree, branches_to_keep)

    add_variables(data_loader, branches_to_keep)
    sgcut_test = TCut("runNumber%5==" + kfold +
                      "&& (Bc_M > 5200 && Bc_M < 5400)")
    sgcut_train = TCut("runNumber%5!=" + kfold +
                       "&& (Bc_M > 5200 && Bc_M < 5400)")
    bgcut_test = TCut("runNumber%5==" + kfold + "&& Bc_M > 5400")
    bgcut_train = TCut("runNumber%5!=" + kfold + "&& Bc_M > 5400")
Exemplo n.º 13
0
    trainfilename="proctrain.csv.root" 


    trainfile = TFile.Open(trainfilename,"read")
    traintree = trainfile.Get(traintree_name)
    
    TMVA.Tools.Instance()
    
    
    # create the tmva output file, which will be full of details about the training
    fout = TFile("tmvatest.root","RECREATE")


    # use the default factory
    factory = TMVA.Factory("TMVAClassification", fout)                                
    dataloader = TMVA.DataLoader("dataset")


    # build the list of variables
    al=traintree.GetListOfBranches()
    varlist=[]
    for i in range(al.GetEntries()):
        varlist+=[al[i].GetName()]

        
    if debug:
        print "all variables of ",trainfile, " ", varlist
        print "now stripping EventId Weight and Label "

    # these three variables should not be used for training
    mva_input_list=[e for e in varlist if not e in ['EventId','Weight','Label']] 
def main():  # runs the program
    checkRootVer()  # check that ROOT version is correct

    try:  # retrieve command line options
        shortopts = "d:o:v:w:y:h?"  # possible command line options
        longopts = [
            "dataset=", "option=", "where=", "year=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(
            sys.argv[1:], shortopts,
            longopts)  # associates command line inputs to variables

    except getopt.GetoptError:  # output error if command line argument invalid
        print("ERROR: unknown options in argument %s" % sys.argv[1:])
        usage()
        sys.exit(1)

    myArgs = np.array(
        [  # Stores the command line arguments   
            ['-d', '--dataset', 'dataset', 'dataset'],
            ['-w', '--where', 'where', 'lpc'], ['-y', '--year', 'year', 2017],
            ['-o', '--option', 'option', 0],
            ['-v', '--verbose', 'verbose', True]
        ],
        dtype="object")

    for opt, arg in opts:
        if opt in myArgs[:, 0]:
            index = np.where(
                myArgs[:,
                       0] == opt)[0][0]  # np.where returns a tuple of arrays
            myArgs[index, 3] = str(
                arg)  # override the variables with the command line argument
        elif opt in myArgs[:, 1]:
            index = np.where(myArgs[:, 1] == opt)[0][0]
            myArgs[index, 3] = arg
        if opt in ("-?", "-h", "--help",
                   "--usage"):  # provides command line help
            usage()
            sys.exit(0)

    # Initialize some containers
    bkg_list = []
    bkg_trees_list = []
    sig_list = []
    sig_trees_list = []

    # Initialize some variables after reading in arguments
    option_index = np.where(myArgs[:, 2] == 'option')[0][0]
    dataset_index = np.where(myArgs[:, 2] == 'dataset')[0][0]
    verbose_index = np.where(myArgs[:, 2] == 'verbose')[0][0]
    where_index = np.where(myArgs[:, 2] == 'where')[0][0]
    year_index = np.where(myArgs[:, 2] == 'year')[0][0]

    DATASETPATH = myArgs[dataset_index][3]
    DATASET = DATASETPATH.split("/")[0]
    OPTION = myArgs[option_index][3]
    VERBOSE = myArgs[verbose_index][3]
    WHERE = myArgs[where_index][3]
    YEAR = myArgs[year_index][3]

    if WHERE == "lpc":
        if YEAR == 2017:
            inputDir = varsList.inputDirLPC2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirLPC2018
    else:
        if YEAR == 2017:
            inputDir = varsList.inputDirBRUX2017
        elif YEAR == 2018:
            inputDir = varsList.inputDirBRUX2018

    if OPTION == "0":
        print("Using Option 0: default varList")
        varList = varsList.varList["DNN"]

    elif OPTION == "1":
        print("Using Option 1: selected data from {}".format(DATASETPATH))
        varsListHPO = open(DATASETPATH + "/varsListHPO.txt", "r").readlines()
        varList = []
        START = False
        for line in varsListHPO:
            if START == True:
                varList.append(str(line.strip()))
            if "Variable List:" in line:
                START = True

    numVars = len(varList)
    outf_key = str("Keras_" + str(numVars) + "vars")
    OUTF_NAME = DATASET + "/weights/TMVA_" + outf_key + ".root"
    outputfile = TFile(OUTF_NAME, "RECREATE")

    # initialize and set-up TMVA factory

    factory = TMVA.Factory(
        "Training", outputfile,
        "!V:!ROC:Silent:Color:!DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    factory.SetVerbose(bool(myArgs[verbose_index, 3]))
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + outf_key

    # initialize and set-up TMVA loader

    loader = TMVA.DataLoader(DATASET)

    if OPTION == "0":
        for var in varList:
            if var[0] == "NJets_MultiLepCalc":
                loader.AddVariable(var[0], var[1], var[2], 'I')
            else:
                loader.AddVariable(var[0], var[1], var[2], "F")
    if OPTION == "1":
        for var in varList:
            if var == "NJets_MultiLepCalc":
                loader.AddVariable(var, "", "", "I")
            else:
                loader.AddVariable(var, "", "", "F")

    # add signal files
    if YEAR == 2017:
        for i in range(len(varsList.sig2017_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2017_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.sig2018_2)):
            sig_list.append(TFile.Open(inputDir + varsList.sig2018_2[i]))
            sig_trees_list.append(sig_list[i].Get("ljmet"))
            sig_trees_list[i].GetEntry(0)
            loader.AddSignalTree(sig_trees_list[i])

    # add background files
    if YEAR == 2017:
        for i in range(len(varsList.bkg2017_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2017_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    elif YEAR == 2018:
        for i in range(len(varsList.bkg2018_2)):
            bkg_list.append(TFile.Open(inputDir + varsList.bkg2018_2[i]))
            bkg_trees_list.append(bkg_list[i].Get("ljmet"))
            bkg_trees_list[i].GetEntry(0)

            if bkg_trees_list[i].GetEntries() == 0:
                continue
            loader.AddBackgroundTree(bkg_trees_list[i])

    loader.SetSignalWeightExpression(weightStrS)
    loader.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    loader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    ######################################################
    ######################################################
    ######                                          ######
    ######            K E R A S   D N N             ######
    ######                                          ######
    ######################################################
    ######################################################
    HIDDEN = 0
    NODES = 0
    LRATE = 0.
    PATTERN = ""
    REGULATOR = ""
    ACTIVATION = ""
    BATCH_SIZE = 0
    # modify this when implementing hyper parameter optimization:
    model_name = 'TTTT_' + str(numVars) + 'vars_model.h5'

    EPOCHS = 100
    PATIENCE = 20

    # edit these based on hyper parameter optimization results
    if OPTION == "0":
        HIDDEN = 3
        NODES = 100
        LRATE = 0.01
        PATTERN = 'static'
        REGULATOR = 'none'
        ACTIVATION = 'relu'
        BATCH_SIZE = 256
    if OPTION == "1":
        datasetDir = os.listdir(DATASETPATH)
        for file in datasetDir:
            if "params" in file: optFileName = file
        optFile = open(DATASETPATH + "/" + optFileName, "r").readlines()
        START = False
        for line in optFile:
            if START == True:
                if "Hidden" in line: HIDDEN = int(line.split(":")[1].strip())
                if "Initial" in line: NODES = int(line.split(":")[1].strip())
                if "Batch" in line:
                    BATCH_SIZE = 2**int(line.split(":")[1].strip())
                if "Learning" in line:
                    LRATE = float(line.split(":")[1].strip())
                if "Pattern" in line: PATTERN = str(line.split(":")[1].strip())
                if "Regulator" in line:
                    REGULATOR = str(line.split(":")[1].strip())
                if "Activation" in line:
                    ACTIVATION = str(line.split(":")[1].strip())
            if "Optimized Parameters:" in line: START = True
    kerasSetting = '!H:!V:VarTransform=G:FilenameModel=' + model_name + \
                   ':SaveBestOnly=true' + \
                   ':NumEpochs=' + str(EPOCHS) + \
                   ':BatchSize=' + str(BATCH_SIZE) + \
                   ':TriesEarlyStopping=' + str(PATIENCE)

    model = build_model(HIDDEN, NODES, LRATE, REGULATOR, PATTERN, ACTIVATION,
                        numVars)
    model.save(model_name)
    model.summary()

    factory.BookMethod(loader, TMVA.Types.kPyKeras, 'PyKeras', kerasSetting)

    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputfile.Close()

    print("Finished training in " + str(
        (time.time() - START_TIME) / 60.0) + " minutes.")

    ROC = factory.GetROCIntegral(DATASET, 'PyKeras')
    print('ROC value is: {}'.format(ROC))
    if OPTION == "1":
        varsListHPOtxt = open(DATASETPATH + "varsListHPO.txt", "a")
        varsListHPOtxt.write("ROC Value: {}".format(ROC))
Exemplo n.º 15
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    loader = TMVA.DataLoader("dataset")
    
    # Set verbosity
#     factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]


    for iVar in varList:
        if iVar[0]=='NJets_JetSubCalc': loader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: loader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")

    loader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    bkgList = varsList.bkg
    
    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir+bkgList[i]))
        print inputDir+bkgList[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        loader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 #0.0159/sigChain.GetEntries() #xs (pb)


    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    loader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    loader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : loader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: loader.SetBackgroundWeightExpression("weight1*weight2");
    loader.SetSignalWeightExpression( weightStrS )
    loader.SetBackgroundWeightExpression( weightStrB )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    loader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

# bdtSetting for "BDT" 
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTMitFisher" 
    bdtFSetting = '!H:!V:NTrees=%s' %nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTG" 
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' %(nTrees,mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray' #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTB" 
    bdtBSetting = '!H:!V:NTrees=%s' %nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
# bdtSetting for "BDTD" 
    bdtDSetting = '!H:!V:NTrees=%s' %nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
#Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m


#BOOKING AN ALGORITHM
#     if methods=="BDT": factory.BookMethod( TMVA.Types.kBDT, "BDT",bdtSetting)
    if methods=="BDT": factory.BookMethod( loader, TMVA.Types.kBDT, "BDT",bdtSetting)    
    if methods=="BDTG": factory.BookMethod( TMVA.Types.kBDT, "BDTG",bdtGSetting)
    if methods=="BDTMitFisher": factory.BookMethod( TMVA.Types.kBDT, "BDTMitFisher",bdtFSetting)
    if methods=="BDTB": factory.BookMethod( TMVA.Types.kBDT, "BDTB",bdtBSetting)
    if methods=="BDTD": factory.BookMethod( TMVA.Types.kBDT, "BDTD",bdtDSetting)
    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the loader to train, test, and evaluate the MVAs. 

    # Train MVAs
    print "train all method"
    factory.TrainAllMethods()

    print "test all method"
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    

    # Save the output.
    outputFile.Close()
    # save plots:
    os.chdir('dataset/weights/'+Note)

    if not gROOT.IsBatch(): TMVA.TMVAGui( outfname )
    print "DONE"
Exemplo n.º 16
0
plt.style.use('ggplot')
RNG = np.random.RandomState(1)

# Create an example regression dataset
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + \
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
data.AddVariable('x', 'F')
data.AddTarget('y', 'F')

add_regression_events(data, X, y)
add_regression_events(data, X, y, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), '')

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
Exemplo n.º 17
0
def main():
    print "\n", "=" * 80
    print "\tDESY 2017 - classification with TMVA"
    print "=" * 80

    # summary root file
    summaryFilename = 'TMVA.root'

    # results directory
    resultsDir = 'results'
    os.system('mkdir -p %s' % resultsDir)

    #------------------------------------------------------------------
    # get signal file and associated Root tree
    sigFilename = "../public/ntuple_HZZ4L.root"
    sigFile, sigTree = getTree(sigFilename)

    # get background file and associated Root tree
    bkgFilename = "../public/ntuple_ZZ4L.root"
    bkgFile, bkgTree = getTree(bkgFilename)

    #------------------------------------------------------------------
    # create a factory for booking machine learning methods
    #------------------------------------------------------------------
    outputFile = TFile("TMVA.root", "recreate")
    options = '''
    !V
    Color
    !Silent
    DrawProgressBar
    AnalysisType=Classification
    Transformations=I;D
    '''
    factory = TMVA.Factory("Z1massZ2mass", outputFile, formatOptions(options))

    #------------------------------------------------------------------
    # set up data set for training and testing
    #------------------------------------------------------------------
    dataLoader = TMVA.DataLoader(resultsDir)

    # define variables to be used
    dataLoader.AddVariable("Z1mass", 'D')
    dataLoader.AddVariable("Z2mass", 'D')

    # define from which trees data are to be taken
    # from and the global weights to be assigned to
    # the training data
    sigWeight = 1.0
    dataLoader.AddSignalTree(sigTree, sigWeight)
    dataLoader.SetSignalWeightExpression("weight")

    bkgWeight = 1.0
    dataLoader.AddBackgroundTree(bkgTree, bkgWeight)
    dataLoader.SetBackgroundWeightExpression("weight")

    # you can apply cuts, if needed
    cut = TCut("")
    options = '''
    SplitMode=Random
    NormMode=EqualNumEvents
    nTrain_Signal=2500
    nTest_Signal=2500
    nTrain_Background=2500
    nTest_Background=2500
    !V 
    '''
    dataLoader.PrepareTrainingAndTestTree(
        cut,  # signal cut
        cut,  # background cut
        formatOptions(options))

    #------------------------------------------------------------------
    # ok, almost done, define machine learning methods to be run
    #------------------------------------------------------------------

    options = '''
    !H
    !V
    VarTransform=None
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kFisher, "Fisher",
                       formatOptions(options))

    options = '''
    !H
    !V
    BoostType=AdaBoost
    NTrees=200
    nEventsMin=100
    nCuts=50
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kBDT, "BDT",
                       formatOptions(options))

    options = '''
    !H
    !V
    NCycles=500
    VarTransform=N
    HiddenLayers=5
    TrainingMethod=BFGS
    '''
    factory.BookMethod(dataLoader, TMVA.Types.kMLP, "MLP",
                       formatOptions(options))

    #------------------------------------------------------------------
    # ok, let's go!
    #------------------------------------------------------------------
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    outputFile.Close()
Exemplo n.º 18
0
        'BDTF': 0,
        'MLP': 0,
        'MLPBFGS': 0,
        'MLPBNN': 0,
        'CFMlpANN': 0,
        'TMlpANN': 0
    }

    factory = t.Factory(
        'vbf_bdt_combined_james_current', outputFile,
        '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;G,D:AnalysisType=Classification'
    )
    factory.Print()

    t.gConfig().GetIONames().fWeightFileDir = outputWeightsDir
    dataloader = t.DataLoader(".")

    dataloader.AddVariable('dijetDEta', 'dijetDEta', 'dijetDEta', 'F')
    dataloader.AddVariable('dijetDPhi', 'dijetDPhi', 'dijetDPhi', 'F')
    dataloader.AddVariable('llgJJDPhi', 'llgJJDPhi', 'llgJJDPhi', 'F')
    dataloader.AddVariable('jPhotonDRMin', 'jPhotonDRMin', 'jPhotonDRMin', 'F')
    dataloader.AddVariable('ptt', 'ptt', 'ptt', 'F')
    dataloader.AddVariable('jetOnePt', 'jetOnePt', 'jetOnePt', 'F')
    dataloader.AddVariable('jetTwoPt', 'jetTwoPt', 'jetTwoPt', 'F')
    dataloader.AddVariable('kin_bdt_james', 'kin_bdt_james', 'kin_bdt_james',
                           'F')
    dataloader.AddVariable('vbfPtBalance', 'vbfPtBalance', 'vbfPtBalance', 'F')
    dataloader.AddVariable('photonZepp', 'photonZepp', 'photonZepp', 'F')

    # trees for training
    inputFile = r.TFile('{0}/output_combined.root'.format(inputFilesDir))
Exemplo n.º 19
0
#print("Input file: {}".format(INPUTFILE))
READ = False
with open(dataset + "/optimize_" + outf_key + "/varsListHPO.txt") as file:
    for line in file.readlines():
        if READ == True:
            varList.append(str(line).strip())
        if "Variable List:" in line: READ = True

numVars = len(varList)

outputfile = TFile(
    dataset + "/weights/TMVAOptimization_" + str(numVars) + "vars.root",
    "RECREATE")

loader = TMVA.DataLoader(dataset + "/optimize_" + outf_key)

for var in varList:
    loader.AddVariable(var, "", "", "F")

# add signal to loader
if year == 2017:
    for i in range(len(varsList.sig2017_1)):
        sig_list.append(TFile.Open(inputDir + varsList.sig2017_1[i]))
        sig_trees_list.append(sig_list[i].Get("ljmet"))
        sig_trees_list[i].GetEntry(0)
        loader.AddSignalTree(sig_trees_list[i], 1)
elif year == 2018:
    for i in range(len(varsList.sig2018_1)):
        sig_list.append(TFile.Open(inputDir + varsList.sig2018_1[i]))
        sig_trees_list.append(sig_list[i].Get("ljmet"))
Exemplo n.º 20
0
import ROOT
from ROOT import TMVA, TFile, TTree, TCut, TString

# In[2]:

outputFile = TFile("TMVA.root", 'RECREATE')
ROOT.TMVA.Tools.Instance()

factory = TMVA.Factory(
    'TMVAClassification', outputFile,
    '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
)

# In[3]:

loader = TMVA.DataLoader("dataset_cv")
loader.AddVariable("Pt", "P_{T}", "GeV", 'D')
loader.AddVariable("metE", "E^{miss}_{T}", "GeV", 'D')
loader.AddVariable("dPhi", "d#phi", 'D')
loader.AddVariable("Mt", "M_{T}", "GeV", 'D')

# In[4]:

f1 = ROOT.TFile.Open('/eos/user/g/gtolkach/signal_event_with_cuts_plus.root')
f2 = ROOT.TFile.Open(
    '/eos/user/g/gtolkach/beackground_event_with_cuts_plus.root')

signal = f1.Get('NOMINAL')
background = f2.Get('NOMINAL')

# In[5]:
Exemplo n.º 21
0
)

print ' '
print 'Copying data DecayTree to TreeB ...'
TreeB = tdata.CopyTree("")
print 'Data DecayTree copied to TreeB'
print ' '

print ' '
print 'Copying MC DecayTree to TreeS ...'
TreeS = tmc.CopyTree("")
print 'MC DecayTree copied to TreeS'
print ' '

dataloader = TMVA.DataLoader(
    "dataset1"
)  # xml weights in dataset1/weights/TMVAClassification_BDT.weights.xml

dataloader.AddVariable("Bs_PT", "D")
dataloader.AddVariable("Kst_PT", "D")
dataloader.AddVariable("Kstb_PT", "D")
dataloader.AddVariable("max_Kp_Km_PT   := max(Kp_PT,Km_PT)", "D")
dataloader.AddVariable("min_Kp_Km_PT   := min(Kp_PT,Km_PT)", "D")
dataloader.AddVariable("max_pip_pim_PT := max(pip_PT,pim_PT)", "D")
dataloader.AddVariable("min_pip_pim_PT := min(pip_PT,pim_PT)", "D")
dataloader.AddVariable("Bs_DIRA_OWNPV", "D")
dataloader.AddVariable("Bs_ENDVERTEX_CHI2", "D")
dataloader.AddVariable("Bs_LOKI_ETA", "D")
dataloader.AddVariable("Kst_LOKI_ETA", "D")
dataloader.AddVariable("Kstb_LOKI_ETA", "D")
dataloader.AddVariable("max_Kp_Km_ETA    := max(Kp_LOKI_ETA,Km_LOKI_ETA)", "D")
Exemplo n.º 22
0
def main():

    usage = 'usage: %prog [options]'
    parser = optparse.OptionParser(usage)
    parser.add_option(
        '-s',
        '--signal_sample',
        dest='input_file_name_signal',
        help='signal sample path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttHnobb_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-x',
        '--bckg1_sample',
        dest='input_file_name_ttJets',
        help='background sample 1 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-y',
        '--bckg2_sample',
        dest='input_file_name_ttW',
        help='background sample 2 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttWJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option(
        '-z',
        '--bckg3_sample',
        dest='input_file_name_ttZ',
        help='background sample 3 path',
        default=
        'samples/samples-NJet_geq_3/MVATraining/ttZJets_TrainMVANoCutJetN.root',
        type='string')
    parser.add_option('-a',
                      '--activation',
                      dest='activation_function',
                      help='activation function',
                      default='relu',
                      type='string')
    parser.add_option('-l',
                      '--hidden_layers',
                      dest='number_of_hidden_layers',
                      help='number of hidden layers',
                      default='2',
                      type='int')
    parser.add_option('-t',
                      '--var_transform',
                      dest='var_transform_name',
                      help='transformation used on input variables',
                      default='None',
                      type='string')
    parser.add_option('-j',
                      '--json',
                      dest='json',
                      help='json file with list of variables',
                      default=None,
                      type='string')
    parser.add_option('-r',
                      '--learning_rate',
                      dest='learning_rate',
                      help='learning rate',
                      default=0.008,
                      type='float')
    parser.add_option('-n',
                      '--num_epochs',
                      dest='num_epochs',
                      help='number of epochs',
                      default=10,
                      type='string')

    (opt, args) = parser.parse_args()

    number_of_hidden_layers = opt.number_of_hidden_layers
    activation_function = opt.activation_function
    var_transform_name = opt.var_transform_name
    num_epochs = opt.num_epochs
    jsonFile = open(opt.json, 'r')
    new_variable_list = json.load(jsonFile, encoding='utf-8').items()
    learning_rate = opt.learning_rate
    layer_nodes = 40

    # Setup TMVA interface to use Keras
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()

    if ',' in var_transform_name:
        var_transform_name_list = var_transform_name.split(',')
        new_var_transform_name = '+'.join(var_transform_name_list)
        print 'new_var_transform_name: ', new_var_transform_name
    else:
        print 'var_transform_name = ', var_transform_name
        new_var_transform_name = var_transform_name
        print 'new_var_transform_name: ', new_var_transform_name

    num_inputs = 0
    for key, value in new_variable_list:
        num_inputs = num_inputs + 1
    print 'num inputs = ', str(num_inputs)
    classifier_parent_dir = 'DNN_noCutJetN_%sVars_%sHLs_%s_%s-VarTrans_%s-learnRate_%s-epochs-%s-nodes' % (
        str(num_inputs), str(number_of_hidden_layers), activation_function,
        new_var_transform_name, str(learning_rate), num_epochs,
        str(layer_nodes))
    classifier_samples_dir = classifier_parent_dir + "/outputs"
    if not os.path.exists(classifier_samples_dir):
        os.makedirs(classifier_samples_dir)

    output_file_name = '%s/%s.root' % (classifier_samples_dir,
                                       classifier_parent_dir)
    output_file = TFile.Open(output_file_name, 'RECREATE')

    # 'AnalysisType' is where one defines what kind of analysis you're doing e.g. multiclass, Classification ....
    # VarTransform: Decorrelation, PCA-transformation, Gaussianisation, Normalisation (for all classes if none is specified).
    # When transformation is specified in factory object, the transformation is only used for informative purposes (not used for classifier inputs).
    # Distributions can be found in output to see how variables would look if transformed.
    factory_name = 'Factory_%s' % (classifier_parent_dir)
    factory_string = '!V:!Silent:Color:DrawProgressBar:Transformations=%s:AnalysisType=multiclass' % var_transform_name
    factory = TMVA.Factory(factory_name, output_file, factory_string)

    #Load data
    input_file_name_signal = opt.input_file_name_signal
    data_signal = TFile.Open(input_file_name_signal)
    signal = data_signal.Get('syncTree')

    input_file_name_ttJets = opt.input_file_name_ttJets
    data_bckg_ttJets = TFile.Open(input_file_name_ttJets)
    background_ttJets = data_bckg_ttJets.Get('syncTree')

    input_file_name_ttW = opt.input_file_name_ttW
    data_bckg_ttW = TFile.Open(input_file_name_ttW)
    background_ttW = data_bckg_ttW.Get('syncTree')

    input_file_name_ttZ = opt.input_file_name_ttZ
    data_bckg_ttZ = TFile.Open(input_file_name_ttZ)
    background_ttZ = data_bckg_ttZ.Get('syncTree')

    # Declare a dataloader interface
    dataloader_name = classifier_parent_dir
    dataloader = TMVA.DataLoader(dataloader_name)

    # Can add selection cuts via:
    # dataloader.AddTree(background_ttJets, 'Background_1', 'myvar > cutBarrelOnly && myEventTypeVar=1', backgroundWeight)

    ### Global event weights ###
    signalWeight = 1.
    backgroundWeight0 = 1.
    backgroundWeight1 = 1.
    backgroundWeight2 = 1.
    dataloader.AddTree(signal, 'ttH', signalWeight)
    dataloader.AddTree(background_ttW, 'ttW', backgroundWeight0)
    dataloader.AddTree(background_ttZ, 'ttZ', backgroundWeight1)
    dataloader.AddTree(background_ttJets, 'ttJets', backgroundWeight2)

    branches = {}
    for key, value in new_variable_list:
        dataloader.AddVariable(str(key))
        branches[key] = array('f', [-999])
        print 'variable: ', key
        branchName = ''
        branchName = key
    dataloader.AddSpectator('nEvent', 'F')

    # Nominal event weight:
    # event weight = puWgtNom * trigWgtNom * lepSelEffNom * genWgt * xsecWgt (* 0 or 1 depending on if it passes event selection)

    dataloader.SetWeightExpression("EventWeight", "ttH")
    dataloader.SetWeightExpression("EventWeight", "ttW")
    dataloader.SetWeightExpression("EventWeight", "ttZ")
    dataloader.SetWeightExpression("EventWeight", "ttJets")

    # NormMode: Overall renormalisation of event-by-event weights used in training.
    # "NumEvents" = average weight of 1 per event, independantly renormalised for signal and background.
    # "EqualNumEvents" = average weight of 1 per signal event, sum of weights in background equal to sum of weights for signal.
    #dataloader.PrepareTrainingAndTestTree(TCut(''), 'V:NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:NormMode=EqualNumEvents')
    dataloader.PrepareTrainingAndTestTree(
        TCut(''), 'V:SplitMode=Random:NormMode=EqualNumEvents')

    # Generate model:
    model = Sequential()

    # Add layers to DNN
    '''
    Dense:
    # Number of nodes
    init= # Initialisation
    activation= # Activation
    input_dim= # Shape of inputs (Number of inputs). Argument only needed for first layer.
    '''

    # first hidden layer
    model.add(
        Dense(layer_nodes,
              init='glorot_normal',
              activation=activation_function,
              input_dim=len(new_variable_list)))

    # Randomly set a fraction rate of input units (defined by argument) to 0 at each update during training (helps prevent overfitting).
    #model.add(Dropout(0.2))

    # Hidden layers
    for x in xrange(number_of_hidden_layers):
        model.add(Dense(layer_nodes, activation=activation_function))

    # Output layer
    # softmax ensures output values are in range 0-1. Can be used as predicted probabilities.
    # 'softmax' activation function used in final layer so that the outputs represent probabilities (output is normalised to 1).
    model.add(Dense(4, activation='softmax'))

    # Set loss and optimizer
    # categorical_crossentropy = optimisation algorithm with logarithmic loss function
    # binary_crossentropy
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=learning_rate),
                  metrics=[
                      'accuracy',
                  ])

    # Store model in file
    model.save('model.h5')
    model.summary()

    # Book methods
    # Choose classifier and define hyperparameters e.g number of epochs, model filename (as chosen above) etc.
    # VarTransform: Decorrelate, PCA, Gauss, Norm, None.
    # Transformations used in booking are used for actual training.
    logs_dir = classifier_parent_dir + '/logs'
    #factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100:Tensorboard=%s' % (var_transform_name, num_epochs, logs_dir)
    factory_string_bookMethod = 'H:!V:VarTransform=%s:FilenameModel=model.h5:NumEpochs=%s:BatchSize=100' % (
        var_transform_name, num_epochs)
    factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "DNN",
                       factory_string_bookMethod)

    # Run training, testing and evaluation
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()
Exemplo n.º 23
0
    output = TFile.Open('./outputMVA/' + signalID + out + '.root', 'RECREATE')
    factory = TMVA.Factory(
        'TMVAClassification', output,
        '!V:!Silent:Color:DrawProgressBar:Transformations=G:AnalysisType=Classification'
    )

    # Load data
    path = '/home/myamatan/data3/angularAna/MVA_workspace/sampleMake/output/'
    dataS = TFile.Open(path + signalName + '.root')
    signal = dataS.Get('coll')
    dataB = TFile.Open(path + bkgName + '.root')
    background = dataB.Get('coll')

    #dataloader = TMVA.DataLoader('weights')
    dataloader = TMVA.DataLoader('test')
    nSF = 1
    nTrainSig = 40000 * nSF
    nTrainBkg = 40000 * nSF
    nInput = 10
    if gp in {'VBFH', 'VBFHVT'}:

        if regime == "Resolved":
            dataloader.AddVariable('Mvbfjj')
            dataloader.AddVariable('Ptvbfjj')
            dataloader.AddVariable('dEtavbfjj')
            dataloader.AddVariable('PtBalance')
            dataloader.AddVariable('MaxEta')
            dataloader.AddVariable('EtaStar')
            #dataloader.AddVariable('dEta_vbfjet1_Vqq')
            #dataloader.AddVariable('dEta_vbfjet2_Vqq')
Exemplo n.º 24
0
    def __init__(self,options):
        TMVA.Tools.Instance()
        TMVA.PyMethodBase.PyInitialize()
        gROOT.LoadMacro( "./TMVAGui.C" )
        self._lOutput     = TFile.Open('TMVA.root', 'RECREATE')
        self._lFactory    = TMVA.Factory('TMVAClassification', self._lOutput,'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')
        self._lDataLoader = TMVA.DataLoader("dataset")

        for i0 in options.lVars:
            self._lDataLoader.AddVariable(i0,'F')

        self._lDataLoader.Print("all")
        #define signal and background tree based on LV flag
        if options.new:
           self._lInputFile  = TFile.Open(options.infile+'.root')
           self._lInputTree  = self._lInputFile.Get("Events")
           self._lSigFile       = TFile.Open("sig.root","RECREATE")
           self._lBkgFile       = TFile.Open("bkg.root","RECREATE")
           self._lSigFile.cd()
           self._lSigTree       = self._lInputTree.CopyTree("LV && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5")
           self._lSigTree.Show(53)
           self._lSigTree.Write()
           self._lSigFile.Close() 
           self._lBkgFile.cd()
           self._lBkgTree       = self._lInputTree.CopyTree("!LV")# && std::abs(eta) < 3.0 && std::abs(eta) > 1.7 && pt > 5")
           self._lBkgTree.Write()
           self._lBkgFile.Close()
        
        self._lSigFile    = TFile.Open("sig.root","READ")
        self._lBkgFile    = TFile.Open("bkg.root","READ")
        self._lSigTree    = self._lSigFile.Get("Events")
        self._lBkgTree    = self._lBkgFile.Get("Events")

        self._lDataLoader.AddSignalTree    (self._lSigTree,1.0)
        self._lDataLoader.AddBackgroundTree(self._lBkgTree,1.0)

        nSig = self._lSigTree.GetEntries()
        nBkg = self._lBkgTree.GetEntries()

        nSigTrain = nSig*0.8
        nBkgTrain = nSig*0.8*1.2

        self._lDataLoader.PrepareTrainingAndTestTree(TCut(""),TCut(""),"nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:NormMode=NumEvents:!V"%(nSigTrain,nBkgTrain,nSigTrain,nBkgTrain)) 

        Methods =  {
        #'Variable': TMVA.Types.kVariable,
        
        #'Cuts': TMVA.Types.kCuts,
        'Likelihood': TMVA.Types.kLikelihood,
        #'BDT': TMVA.Types.kBDT
        #'PyRandomForest': TMVA.Types.kPyRandomForest,
        #'MaxMethod': TMVA.Types.kMaxMethod
        }
        '''
        'PDERS': TMVA.Types.kPDERS,
        'HMatrix': TMVA.Types.kHMatrix,
        'Fisher': TMVA.Types.kFisher,
        'KNN': TMVA.Types.kKNN,
        'CFMlpANN': TMVA.Types.kCFMlpANN,
        'TMlpANN': TMVA.Types.kTMlpANN,
        'BDT': TMVA.Types.kBDT,
        'DT': TMVA.Types.kDT,
        'RuleFit': TMVA.Types.kRuleFit,
        'SVM': TMVA.Types.kSVM,
        'MLP': TMVA.Types.kMLP,
        'BayesClassifier': TMVA.Types.kBayesClassifier,
        'FDA': TMVA.Types.kFDA,
        'Boost': TMVA.Types.kBoost,
        'PDEFoam': TMVA.Types.kPDEFoam,
        'LD': TMVA.Types.kLD,
        'Plugins': TMVA.Types.kPlugins,
        'Category': TMVA.Types.kCategory,
        'DNN': TMVA.Types.kDNN,
        'PyRandomForest': TMVA.Types.kPyRandomForest,
        'PyAdaBoost': TMVA.Types.kPyAdaBoost,
        'PyGTB': TMVA.Types.kPyGTB,
        'PyKeras': TMVA.Types.kPyKeras,
        'C50': TMVA.Types.kC50,
        'RSNNS': TMVA.Types.kRSNNS,
        'RSVM': TMVA.Types.kRSVM,
        'RXGB': TMVA.Types.kRXGB,
        'MaxMethod': TMVA.Types.kMaxMethod
        '''

        for m,t in Methods.iteritems():
           self._lFactory.BookMethod( self._lDataLoader, t, m, "" )

        self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kBDT, 'BDT', '!H:!V:NTrees=300:MinNodeSize=2.5%:MaxDepth=3:BoostType=Grad:SeparationType=GiniIndex:nCuts=100:UseBaggedBoost=True:PruneMethod=NoPruning')
        #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT2', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=crossentropy:ncuts=100:prunemethod=nopruning')

        #self._lfactory.bookmethod(self._ldataloader, tmva.types.kbdt, 'BDT3', '!h:!v:ntrees=300:minnodesize=2.5%:maxdepth=4:boosttype=AdaBoost:separationtype=GiniIndex:ncuts=100:prunemethod=nopruning')
        #self._lFactory.BookMethod( self._lDataLoader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

        model = Sequential()
        model.add(Dense(len(options.lVars), input_dim=len(options.lVars),activation='tanh'))
        model.add(Dense(30,  activation='tanh'))
        model.add(Dense(20,  activation='relu'))
        model.add(Dense(10,  activation='tanh'))
        model.add(Dense(5,   activation='relu'))
        model.add(Dense(2,   activation='sigmoid'))
 
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy',])
        model.save('model.h5')
        model.summary()

        self._lFactory.BookMethod(self._lDataLoader, TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:FilenameModel=model.h5:NumEpochs=10:BatchSize=500')

        frozen_graph = freeze_session(K.get_session(),
                              output_names=[out.op.name for out in model.outputs])
        tf.train.write_graph(frozen_graph, "h5_files", "tf_model.pb", as_text=False)
 

        self._lFactory.Print("v")
    '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification')

# Load data
dataBs = TFile.Open('bankBsJpsiPhi16.root')
dataBsD0 = TFile.Open('bankBsJpsiPhiDGamma016.root')
dataBu = TFile.Open('bankBuJpsiK16.root')
dataBd = TFile.Open('bankBdJpsiKx16.root')
dataBdNR = TFile.Open('bankBdKxMuMu16.root')

treeBs = dataBs.Get('PDsecondTree')
treeBsD0 = dataBsD0.Get('PDsecondTree')
treeBu = dataBu.Get('PDsecondTree')
treeBd = dataBd.Get('PDsecondTree')
treeBdNR = dataBdNR.Get('PDsecondTree')

dataloader = TMVA.DataLoader('dataset')

# add variables
dataloader.AddVariable('muoPt', 'F')
dataloader.AddVariable('abs(muoEta)', 'F')
dataloader.AddVariable('muoSegmComp', 'F')
dataloader.AddVariable('muoChi2LM', 'F')
dataloader.AddVariable('muoChi2LP', 'F')
dataloader.AddVariable('muoGlbTrackTailProb', 'F')
dataloader.AddVariable('muoIValFrac', 'F')
dataloader.AddVariable('muoLWH', 'I')
dataloader.AddVariable('muoTrkKink', 'F')
dataloader.AddVariable('muoGlbKinkFinderLOG', 'F')
dataloader.AddVariable('muoTimeAtIpInOutErr', 'F')
dataloader.AddVariable('muoOuterChi2', 'F')
dataloader.AddVariable('muoInnerChi2', 'F')
Exemplo n.º 26
0
    def _make_dataloader(self):
        '''Make the DataLoader for training.'''

        # Load the data.
        self.dataloader = TMVA.DataLoader(self.name)

        # Add training variables.
        for var in self.variables:
            if not isinstance(var, (tuple, list)):
                var = (var, )
            try:
                self.dataloader.AddVariable(*var)
            except:
                print 'Failed to call dataloader.AddVariable with args', var
                raise

        # Add spectator variables.
        for var in self.spectators:
            if not isinstance(var, (tuple, list)):
                var = (var, )
            try:
                self.dataloader.AddSpectator(*var)
            except:
                print 'Failed to call dataloader.AddSpectator with args', var
                raise

        # Register trees.
        # If we have explicit cuts for training and testing, we need to copy the TTrees first,
        # applying these cuts.
        if self.trainingcut:
            pwd = ROOT.gROOT.CurrentDirectory()
            self.tmpfile = ROOT.TFile.Open(
                os.path.abspath('DataLoader_' + random_string() + '.root'),
                'recreate')
            self.tmpfile.cd()
            signal_usedleaves, background_usedleaves = self.used_leaves()
            usedleaves = {
                'Signal': signal_usedleaves,
                'Background': background_usedleaves
            }
            aliases = {
                'Signal': get_aliases(self.signaltree),
                'Background': get_aliases(self.backgroundtree)
            }
            addtreeargs = []
            for name in 'Signal', 'Background':
                lname = name.lower()
                namecut = getattr(self, lname + 'cut')
                for tname, ttype, cut in ('Training', TMVA.Types.kTraining,
                                          self.trainingcut), (
                                              'Testing', TMVA.Types.kTesting,
                                              self.testingcut):
                    classname = self.name + '_' + name + '_' + tname + '_'
                    cut = AND(*filter(None, [namecut, cut]))
                    tree = getattr(self, lname + 'tree')
                    seltree, copyfriends = copy_tree(
                        tree,
                        selection=cut,
                        keepbranches=usedleaves[name],
                        rename=(
                            lambda name: classname + name.replace('/', '_')),
                        write=True,
                        returnfriends=True)
                    addtreeargs.append((seltree.GetName(), name,
                                        getattr(self, lname + 'globalweight'),
                                        ROOT.TCut(''), ttype))
                weight = getattr(self, lname + 'weight')
                if weight:
                    self.dataloader.SetWeightExpression(weight, name)

            fname = self.tmpfile.GetName()
            self.tmpfile.Close()
            self.tmpfile = ROOT.TFile.Open(fname)
            for args in addtreeargs:
                tree = self.tmpfile.Get(args[0])
                _aliases = aliases['Signal'] if 'Signal' in args else aliases[
                    'Background']
                for name, alias in _aliases.items():
                    tree.SetAlias(name, alias)
                self.dataloader.AddTree(tree, *args[1:])
            self.dataloader.GetDataSetInfo().SetSplitOptions(
                str(self.splitoptions))
            if pwd:
                pwd.cd()

        else:
            self.dataloader.AddSignalTree(self.signaltree,
                                          self.signalglobalweight)
            self.dataloader.AddBackgroundTree(self.backgroundtree,
                                              self.backgroundglobalweight)

            # Set weight expressions.
            if self.signalweight:
                self.dataloader.SetSignalWeightExpression(self.signalweight)
            if self.backgroundweight:
                self.dataloader.SetBackgroundWeightExpression(
                    self.backgroundweight)

            # Prepare the training.
            self.dataloader.PrepareTrainingAndTestTree(
                ROOT.TCut(self.signalcut), ROOT.TCut(self.backgroundcut),
                str(self.splitoptions))
        return True
Exemplo n.º 27
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"
Exemplo n.º 28
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
        
    # Import TMVA classes from ROOT
    from ROOT import TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )

    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    dataloader.AddVariable( "myvar1 := var1+var2", 'F' )
    dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' )
    dataloader.AddVariable( "var3",                "Variable 3", "units", 'F' )
    dataloader.AddVariable( "var4",                "Variable 4", "units", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    #dataloader.AddSpectator( "spec1 := var1*2",  "Spectator 1", "units", 'F' );
    #dataloader.AddSpectator( "spec2 := var1*3",  "Spectator 2", "units", 'F' );

    # Read input data
    if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    input = TFile.Open( infname )

    # Get the signal and background trees for training
    signal      = input.Get( treeNameSig )
    background  = input.Get( treeNameBkg )
    
    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree    ( signal,     signalWeight     )
    dataloader.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut( "" ) 
    mycutBkg = TCut( "" ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    if "CutsSA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" )                        

    if "BDT" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDT",
                           "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20" )

    if "BDTB" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTB",
                           "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" )

    if "BDTD" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kBDT, "BDTD",
                           "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( dataloader, TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    TMVA.TMVAGui(outfname)
    
    # keep the ROOT thread running
    gApplication.Run() 
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()

output = TFile.Open('TMVA_CNN_PyKeras.root', 'RECREATE')
factory = TMVA.Factory(
    'TMVAClassification', output,
    '!V:!Silent:Color:DrawProgressBar:Transformations=None:AnalysisType=Classification'
)

############################Loading the data file
data = TFile.Open("/home/jui/Desktop/tmva/sample_images_32x32.root")
# print(data.ls())
signal = data.Get('sig_tree;2')
background = data.Get('bkg_tree;2')

dataloader = TMVA.DataLoader('dataset_evaltest')

imgSize = 1024

dataloader.AddVariablesArray("vars", imgSize)

dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(
    TCut(''),
    'nTrain_Signal=8000:nTrain_Background=8000:SplitMode=Random:NormMode=NumEvents:!CalcCorrelations:!V'
)

# Generate model

# Define model
Exemplo n.º 30
0
argv = []

inputFile_sig = TFile.Open(
    "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_both_smearf_1M_35ns_e"
    + str(options.eff) + ".root")
inputFile_bkg = TFile.Open(
    "/home/net3/afortman/projects/hotpot/oct_sim/efftesting/hazel_bkg_smearf_1M_35ns_e"
    + str(options.eff) + ".root")
outputFile = TFile.Open("TMVAOutput_sigbkg_e" + str(options.eff) + ".root",
                        "RECREATE")

factory = TMVA.Factory(
    "TMVAClassification", outputFile,
    "!V:!Silent:Color:!DrawProgressBar:AnalysisType=Classification")

loader = TMVA.DataLoader("dataset_e" + str(options.eff))

#loader.AddVariable("EventNumHazel",'I')
#loader.AddVariable("EventNumGingko",'I')
#loader.AddVariable("trigger_gingko",'I')
#loader.AddVariable("iroad_x",'I')
#loader.AddVariable("iroad_u",'I')
#loader.AddVariable("iroad_v",'I')
loader.AddVariable("Hit_plane0", 'I')
loader.AddVariable("Hit_plane1", 'I')
loader.AddVariable("Hit_plane2", 'I')
loader.AddVariable("Hit_plane3", 'I')
loader.AddVariable("Hit_plane4", 'I')
loader.AddVariable("Hit_plane5", 'I')
loader.AddVariable("Hit_plane6", 'I')
loader.AddVariable("Hit_plane7", 'I')