def checkRootVer():
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
Exemplo n.º 2
0
#!/usr/bin/env python


#example: python MyTMVAClassification.py   truth  akt10truth_trim_pt,akt10truth_trim_mass "pt>0,pt<1000,mass>0,mass<200,pass_selection==1"  "truth_tau2_WTA,truth_tau1_WTA"  KNN

# Standard python import
import sys    # exit
import time   # time accounting
import getopt # command line parser

# Import ROOT classes
from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

# check ROOT version, give alarm if 5.18 
if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
    print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
    print "*** does not run properly (function calls with enums in the argument are ignored)."
    print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
    print "*** or use another ROOT version (e.g., ROOT 5.19)."
    sys.exit(1)

# Import TMVA classes from ROOT
from ROOT import TMVA



# Print usage help
def usage():
    print " "
    print "Usage: python %s [options]"
def main():

    try:
        # retrive command line options
        shortopts = "m:p:M:C:B:i:t:T:o:vh?"
        opts, args = getopt.getopt(sys.argv[1:], shortopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    methods = DEFAULT_METHODS
    mass = DEFAULT_MASS
    cat = DEFAULT_CAT
    phil = DEFAULT_PHIL
    outfname = DEFAULT_OUTFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    bkg_method = DEFAULT_BACKGROUND
    width = DEFAULT_WIDTH
    verbose = False
    test = False
    testType = DEFAULT_TEST_TYPE
    methTest = False
    testMethod = DEFAULT_TEST_METHOD
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-M", "--mass"):
            mass = int(a)
        elif o in ("-C", "--cat"):
            cat = int(a)
        elif o in ("-p", "--philosophy"):
            phil = a
        elif o in ("-B", "--background"):
            bkg_method = int(a)
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-T", "--test"):
            test = True
            temp = a.split('_')
            if len(temp) == 1:
                testType = temp[0]
                if testType != "ada" or testType != "grad":
                    print "ERROR: testType must be ada or grad not", testType
            elif len(temp) - temp.count('') == 2:
                methTest = True
                testType = temp[0]
                testMethod = temp[1]
                checkTestType(testType, testMethod)
            else:
                print "ERROR: need to give one or two test options"
                print temp
                sys.exit(1)
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    if (width == 0.02): width_str = "_2pt"
    elif (width == 0.07): width_str = "_7pt"
    mass_str = "_" + str("%3.1f" % mass)
    cat_str = "_" + str(cat)
    if cat < 0:
        cat_str = "_all"
    if test:
        if methTest:
            outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + "_" + testMethod + ".root"
        else:
            outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + "_test_" + testType + ".root"
    else:
        outfname = "TMVAStuff/" + outfname + "_" + phil + cat_str + ".root"

    #treeNameSig = treeNameSig + mass_str
    #treeNameBkg = treeNameBkg + mass_str

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library)
    # load also GUI
    gROOT.SetMacroPath("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/")
    gROOT.Macro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAlogon.C")
    gROOT.LoadMacro("/vols/cms03/mk1009/h2g/MVA/tmvaMacros/TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for
    # more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    factory.AddVariable("bdtoutput", "BDT Output", 'F')
    factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F')

    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal_train = input.Get(treeNameSig + "_train" + mass_str)
    signal_test = input.Get(treeNameSig + "_test" + mass_str)

    background_train = input.Get(treeNameBkg + "_train" + width_str + mass_str)
    background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    factory.AddSignalTree(signal_train, signalWeight, "train")
    factory.AddBackgroundTree(background_train, backgroundWeight, "train")
    factory.AddSignalTree(signal_test, signalWeight, "test")
    factory.AddBackgroundTree(background_test, backgroundWeight, "test")

    # Set individual event weights (the variables must exist in the original
    # TTree)
    factory.SetBackgroundWeightExpression("wt")
    factory.SetSignalWeightExpression("wt")

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycut = TCut("fabs(deltaMOverM)<=" + str(width) +
                 " && bdtoutput > -0.5")  #
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    factory.PrepareTrainingAndTestTree(
        mycut, mycut,
        "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V")
    # Boosted Decision Trees
    # NEW PARAMETERS

    if (not test):
        # Likelihood
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood" + phil,
            "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD" + phil,
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )
        #factory.BookMethod( TMVA.Types.kPDERS, "MultiLikelihood"+phil,"!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );

        # BDT
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTada" + phil,
            "!H:!V:NTrees=200:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=1.0:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
        )
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTgrad" + phil,
            "!H:!V:NTrees=200:MaxDepth=3:BoostType=Grad:Shrinkage=0.5:UseBaggedGrad:GradBaggingFraction=1.0:SeparationType=GiniIndex:nCuts=50:NNodesMax=10"
        )

    else:  #test
        # BDT ada
        if testType == "ada":
            #if testMethod=="nTrees":
            for nTrees in [10, 50, 100, 200, 500]:
                for depth in [2, 3]:
                    factory.BookMethod(
                        TMVA.Types.kBDT, "BDT_ada" + str(phil) + "_" +
                        str(nTrees) + "t_" + str(depth) + "d",
                        "!H:!V:NTrees=" + str(nTrees) +
                        ":nEventsMin=150:MaxDepth=" + str(depth) +
                        ":BoostType=AdaBoost:AdaBoostBeta=1:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
                    )

        # if testMethod=="depth":
        #   for depth in [2,3]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

        #if testMethod=="nCuts":
        #  for nCuts in [5,10,20,50,100,200]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_0.05b_"+str(nCuts)+"c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts="+str(nCuts)+":PruneMethod=NoPruning")

        #if testMethod=="beta":
        #  for beta in [0.05,0.5,1.]:
        #    factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_50d_"+str(beta)+"b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth=50:BoostType=AdaBoost:AdaBoostBeta="+str(beta)+":SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

        # BDT grad
        if testType == "grad":
            if testMethod == "nTrees":
                for nTrees in [10, 50, 100, 200, 500]:
                    for depth in [2, 3]:
                        for shrinkage in [0.05, 0.5, 1.]:
                            factory.BookMethod(
                                TMVA.Types.kBDT, "BDT_grad" + str(phil) + "_" +
                                str(nTrees) + "t_" + str(depth) + "d_" +
                                str(shrinkage) + "s", "!H:!V:NTrees=" +
                                str(nTrees) + ":MaxDepth=" + str(depth) +
                                ":BoostType=Grad:Shrinkage=" + str(shrinkage) +
                                ":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10"
                            )

            #if testMethod=="depth":
            # for depth in [2,3]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_ada"+str(phil)+"_200t_"+str(depth)+"d_0.05b_50c","!H:!V:NTrees=200:nEventsMin=150:MaxDepth="+str(depth)+":BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning")

            #if testMethod=="shrinkage":
            #  for shrinkage in [0.05,0.1,0.5,1.]:
            #    factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_"+str(shrinkage)+"s_1gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage="+str(shrinkage)+":UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax=10")

            #if testMethod=="bagFrac":
            #  for bagFrac in [0.05,0.1,0.5,1.]:
            #   factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_"+str(bagFrac)+"gb_50c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction="+str(bagFrac)+":SeparationType=GiniIndex:nCuts=50:NNodesMax=10")

            #if testMethod=="nCuts":
            # for nCuts in [5,10,20,50,100,200]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_"+str(nCuts)+"c_10nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts="+str(nCuts)+":NNodesMax=10")

            #if testMethod=="nNM":
            # for nNM in [10,100,500,1000,10000]:
            #  factory.BookMethod( TMVA.Types.kBDT, "BDT_grad"+str(phil)+"_200t_1s_1gb_50c_"+str(nNM)+"nm","!H:!V:NTrees=200:BoostType=Grad:Shrinkage=1:UseBaggedGrad:GradBaggingFraction=1:SeparationType=GiniIndex:nCuts=50:NNodesMax"+str(nNM))

    # --------------------------------------------------------------------------------------------------
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    #factory.OptimizeAllMethods()
    factory.TrainAllMethods()
    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
def main():

    try:
        # retrive command line options
        shortopts = "m:M:C:B:i:t:o:vh?"
        opts, args = getopt.getopt(sys.argv[1:], shortopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    methods = DEFAULT_METHODS
    mass = DEFAULT_MASS
    cat = DEFAULT_CAT
    outfname = DEFAULT_OUTFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    bkg_method = DEFAULT_BACKGROUND
    width = DEFAULT_WIDTH
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-M", "--mass"):
            mass = int(a)
        elif o in ("-C", "--cat"):
            cat = int(a)
        elif o in ("-B", "--background"):
            bkg_method = int(a)
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    if (width == 0.02): width_str = "_2pt"
    elif (width == 0.07): width_str = "_7pt"
    mass_str = "_" + str(mass)
    cat_str = "_" + str(cat)
    if cat < 0:
        cat_str = "_all"
    outfname = outfname + mass_str + cat_str + ".root"
    #treeNameSig = treeNameSig + mass_str
    #treeNameBkg = treeNameBkg + mass_str

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library)
    # load also GUI
    gROOT.SetMacroPath("./")
    #gROOT.Macro       ( "./TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./TMVAGui.C" )

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for
    # more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Define the input variables that shall be used for the classifier training
    factory.AddVariable("pho1_ptOverM", "P_{T}^{lead} / M_{H}", "", 'F')
    factory.AddVariable("pho2_ptOverM", "P_{T}^{sublead} / M_{H}", "", 'F')
    factory.AddVariable("pho1_eta", "#eta^{lead}", "", 'F')
    factory.AddVariable("pho2_eta", "#eta^{sublead}", "", 'F')
    factory.AddVariable("d_phi", "#Delta #phi", "rad", 'F')
    #should this be cos delta Phi
    factory.AddVariable("H_ptOverM", "P_{T}^{Higgs}/M_{H}", "", 'F')
    factory.AddVariable("H_eta", "#eta^{Higgs}", "", 'F')

    factory.AddVariable("sigmaMOverM", "#sigmaM_{cor} / M", 'F')
    factory.AddVariable("sigmaMOverM_wrongVtx", "#sigmaM_{wrong} / M", 'F')

    factory.AddVariable("vtx_prob", "P_{vertex}", "", 'F')

    #factory.AddVariable( "cos_theta_star","cos(#theta)*", "", 'F' );
    #factory.AddVariable( "max_eta","max(#eta^{lead},#eta^{sub.})", "", 'F' );
    #factory.AddVariable( "min_r9","min(r9^{lead},r9^{sub.})", "", 'F' );

    factory.AddVariable("deltaMOverM", "#DeltaM / M_{Hypth}.", 'F')

    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal_train = input.Get(treeNameSig + "_train" + mass_str + ".0")
    signal_test = input.Get(treeNameSig + "_test" + mass_str + ".0")

    background_train = input.Get(treeNameBkg + "_train" + width_str +
                                 mass_str + ".0")
    background_test = input.Get(treeNameBkg + "_test" + width_str + mass_str +
                                ".0")

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    factory.AddSignalTree(signal_train, signalWeight, "train")
    factory.AddBackgroundTree(background_train, backgroundWeight, "train")
    factory.AddSignalTree(signal_test, signalWeight, "test")
    factory.AddBackgroundTree(background_test, backgroundWeight, "test")

    # Set individual event weights (the variables must exist in the original
    # TTree)
    factory.SetBackgroundWeightExpression("wt")
    factory.SetSignalWeightExpression("wt")

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycut = TCut("fabs(deltaMOverM)<=" + str(width))  #
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    factory.PrepareTrainingAndTestTree(
        mycut, mycut,
        "nTrain_Signal=0:nTrain_Background=0:NormMode=NumEvents:!V")
    # Boosted Decision Trees
    # NEW PARAMETERS
    factory.BookMethod(
        TMVA.Types.kBDT, "BDT_ada" + mass_str + cat_str,
        "!H:!V:NTrees=400:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.05:SeparationType=GiniIndex:nCuts=50:PruneMethod=NoPruning"
    )
    factory.BookMethod(
        TMVA.Types.kBDT, "BDT_grad" + mass_str + cat_str,
        "!H:!V:NTrees=500:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=50:NNodesMax=5"
    )
    #test

    # --------------------------------------------------------------------------------------------------
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    #factory.OptimizeAllMethods()
    factory.TrainAllMethods()
    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
Exemplo n.º 5
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV']
    #     varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg',
    #                'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2']
    #     varList = ['svMass', 'dRTauTau', 'svPt', 'dRhh', 'met', 'mJJReg',
    #                 'metTau1DPhi', 'metTau2DPhi', 'metJ2DPhi', 'metJetPairDPhi','CSVJ1', 'CSVJ2']
    varList = [
        'svMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg',
        'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi',
        'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi', 'CSVJ1',
        'CSVJ2'
    ]
    for iVar in varList:
        factory.AddVariable(iVar, 'F')

    #factory.AddVariable( "NBTags",'I' )

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
#     factory.AddSpectator( "fMass")
#     factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

# Read input data
#     sigChain = r.TChain("ttTreeFinal/eventTree")
#     bkg1Chain = r.TChain("ttTreeFinal/eventTree")
#     bkg2Chain = r.TChain("ttTreeFinal/eventTree")

# Get the signal and background trees for training

    iFileSig = TFile.Open("/scratch/zmao/relaxed_regression/%s" % (infname))
    iFileBkg = TFile.Open(
        "/scratch/zmao/relaxed_regression/trainSample_relaxedsamebTag.root")

    sigChain = iFileSig.Get("eventTree")
    bkgChain = iFileBkg.Get("eventTree")

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree(sigChain, signalWeight)
    factory.AddBackgroundTree(bkgChain, 1)
    factory.SetSignalWeightExpression('triggerEff')

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(
        "iso1<1.5 && iso2<1.5 && CSVJ1 > 0.679 && CSVJ2 > 0.244 && abs(eta1)<2.1 && abs(eta2)<2.1 && charge1 + charge2 == 0"
    )
    mycutBkg = TCut("")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # Fisher discriminant (same as LD)
    #    if "Fisher" in mlist:
    #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )
    #       factory.BookMethod( TMVA.Types.kFisher, "Fisher")

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=-1"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    #     gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    ChangeWeightName = 'mv /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /afs/hep.wisc.edu/home/zmao/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_QCD_%i.xml' % len(
        varList)
    os.system(ChangeWeightName)
Exemplo n.º 6
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:s:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "seed=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    SeedN = DEFAULT_SEED
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-s", "--seed"):
            SeedN = long(a)
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth
    outfname = "dataset/weights/TMVA_" + Note + ".root"

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TRandom3

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    fClassifier = TMVA.Factory(
        "VariableImportance",
        "!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"
    )
    str_xbitset = '{:053b}'.format(SeedN)

    seeddl = TMVA.DataLoader(str_xbitset)

    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'

    index = 52

    for iVar in varList:
        if (str_xbitset[index] == '1'):
            seeddl.AddVariable(iVar[0], iVar[1], iVar[2], 'F')
            print iVar[0]
        index = index - 1

    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")

    seeddl.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    bkgList = varsList.bkg

    for i in range(len(bkgList)):
        bkg_list.append(TFile.Open(inputDir + bkgList[i]))
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        seeddl.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1

    seeddl.SetSignalWeightExpression(weightStrS)
    seeddl.SetBackgroundWeightExpression(weightStrB)

    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    seeddl.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    fClassifier.BookMethod(seeddl, TMVA.Types.kBDT, "BDT", bdtSetting)
    fClassifier.TrainAllMethods()
    fClassifier.TestAllMethods()
    fClassifier.EvaluateAllMethods()

    SROC = fClassifier.GetROCIntegral(str_xbitset, "BDT")
    print "ROC-integral : ", str_xbitset, " ", SROC
    print "SEED " + str_xbitset + " DONE"
    fClassifier.DeleteAllMethods()
    fClassifier.fMethodsMap.clear()
    print "=================================================================="
    print "=================================================================="
Exemplo n.º 7
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    #gROOT.Macro       ( "./TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./TMVAGui.C" )

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile("TMVA.root", "RECREATE")

    #outputFile = TFile( outfname, 'RECREATE' )

    # Create dataloader
    dataloader = TMVA.DataLoader("dataset")
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    # Read input data
    # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname )

    file1 = TFile.Open("~/Test/Testy/out_sig.root")
    signal = file1.Get("ntuple")
    signalWeight = 1.0
    dataloader.AddSignalTree(signal, signalWeight)

    file2 = TFile.Open("~/Test/Testy/out_bkg.root")
    background = file2.Get("ntuple")
    backgroundWeight = 1.0
    dataloader.AddBackgroundTree(background, backgroundWeight)
    # Get the signal and background trees for training

    dataloader.AddVariable("dist", "Vardist", "units", 'F')
    dataloader.AddVariable("mu_likep", "Varmup", "units", 'F')
    dataloader.AddVariable("mu_likem", "Varmum", "units", 'F')
    dataloader.AddVariable("DeltPhi", "VarDel", "units", 'F')
    dataloader.AddSpectator("minv", "Varminv", 'F')

    # Global event weights (see below for setting event-wise weights)

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut("")
    mycutBkg = TCut("")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLP",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2"
        )

    if "BDT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    from subprocess import call
    from os.path import isfile
    from keras.models import Sequential
    from keras.layers.core import Dense, Activation
    from keras.regularizers import l2
    from keras.optimizers import SGD
    TMVA.Tools.Instance()
    TMVA.PyMethodBase.PyInitialize()
    model = Sequential()
    model.add(Dense(64, activation='relu', W_regularizer=l2(1e-5),
                    input_dim=4))
    model.add(Dense(2, activation='softmax'))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=0.01),
                  metrics=[
                      'accuracy',
                  ])

    # Store model to file
    model.save('model.h5')
    model.summary()

    # Book methods

    if "Keras" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "Fisher",
                           "!H:!V:Fisher:VarTransform=D,G")
        factory.BookMethod(
            dataloader, TMVA.Types.kPyKeras, "PyKeras",
            "H:!V:VarTransform=D,G:FilenameModel=model.h5:NumEpochs=20:BatchSize=32"
        )
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
Exemplo n.º 8
0
def main():

    try:
        # retrive command line options
        shortopts  = "w:m:i:j:f:g:t:o:a:vgh?"
        longopts   = ["weight_fold=", "methods=", "inputfilesig=", "inputfilebkg=", "friendinputfilesig=", "friendinputfilebkg=", "inputtrees=", "outputfile=", "verbose", "gui", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infnameSig     = DEFAULT_INFNAMESIG
    infnameBkg     = DEFAULT_INFNAMEBKG
    friendfnameSig = DEFAULT_FRIENDNAMESIG
    friendfnameBkg = DEFAULT_FRIENDNAMEBKG
    treeNameSig    = DEFAULT_TREESIG
    treeNameBkg    = DEFAULT_TREEBKG
    outfname       = DEFAULT_OUTFNAME
    methods        = DEFAULT_METHODS
    weight_fold    = "weights"
    verbose        = False
    gui            = False
    addedcuts      = ""
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-w", "--weight_fold"):
            weight_fold = a
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfilesig"):
            infnameSig = a
        elif o in ("-j", "--inputfilebkg"):
            infnameBkg = a
        elif o in ("-f", "--friendinputfilesig"):
            friendfnameSig = a
        elif o in ("-g", "--friendinputfilebkg"):
            friendfnameBkg = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-a", "--addedcuts"):
            addedcuts = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-g", "--gui"):
            gui = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Print the file
    print "Using file " + infnameSig + " for signal..."
    print "Using file " + infnameBkg + " for background..."

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18
    print "ROOT version is " + str(gROOT.GetVersionCode())
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
## SO I TAKE DEFAULT FORM ROOT#    gROOT.Macro       ( "./TMVAlogon.C" )    
#!    gROOT.LoadMacro   ( "./TMVAGui.C" )
 
   
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = weight_fold;

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "dR_l1l2", "dR_l1l2", "", 'F' )
    factory.AddVariable( "dR_b1b2", "dR_b1b2", "", 'F' )
    factory.AddVariable( "dR_bl", "dR_bl", "", 'F' )
    factory.AddVariable( "dR_l1l2b1b2", "dR_l1l2b1b2", "", 'F' )
    factory.AddVariable( "MINdR_bl", "MINdR_bl", "", 'F' )
    factory.AddVariable( "dphi_l1l2b1b2", "dphi_l1l2b1b2", "", 'F' )
    factory.AddVariable( "mass_l1l2", "mass_l1l2", "", 'F' )
    factory.AddVariable( "mass_b1b2", "mass_b1b2", "", 'F' )
    factory.AddVariable( "mass_trans", "mass_trans", "", 'F' )
    factory.AddVariable( "MT2", "MT2", "", 'F' )
    factory.AddVariable( "pt_b1b2", "pt_b1b2", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_MaxBin", "MMC_h2mass_MaxBin", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_RMS", "MMC_h2mass_RMS", "", 'F' )
    #factory.AddVariable( "MMC_h2mass_prob", "MMC_h2mass_prob", "", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training, 
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the 
    # input variables, the response values of all trained MVAs, and the spectator variables
    # factory.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    if gSystem.AccessPathName( infnameSig ) != 0 or gSystem.AccessPathName( infnameBkg ): gSystem.Exec( "wget http://root.cern.ch/files/" + infname )
        
    inputSig = TFile.Open( infnameSig )
    inputBkg = TFile.Open( infnameBkg )

    # Get the signal and background trees for training
    signal      = inputSig.Get( treeNameSig )
    background  = inputBkg.Get( treeNameBkg )

    ##signal.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameSig )
    ##background.AddFriend( "eleIDdir/isoT1 = eleIDdir/T1", friendfnameBkg )

    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.
    backgroundWeight = 1.

#I don't think there's a general answer to this. The safest 'default'
#is to use the envent weight such that you have equal amounts of signal
#and background
#for the training, otherwise for example:  if you look for a rare
#signal and you use the weight to scale the number of events according
#to the expected ratio of signal and background
#according to the luminosity... the classifier sees hardly any signal
#events and "thinks" .. Oh I just classify everything background and do
#a good job!
#
#One can try to 'optimize' the training a bit more in  either 'high
#purity' or 'high efficiency' by choosing different weights, but as I
#said, there's no fixed rule. You'd have
#to 'try' and see if you get better restults by playing with the weights.

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 
    mycutSig = TCut( addedcuts ) 
    #mycutBkg = TCut( "event_n%2!=0 && " + addedcuts ) 
    mycutBkg = TCut( addedcuts ) 
    #mycutBkg = TCut( "nu1and2_diBaxis_t>-900 && met_diBaxis_t>-900&& hasb1jet && hasb2jet && hasMET && hasGenMET && hasdRljet && hastwomuons" ) 

    print mycutSig

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:VarProp[0]=FMax:VarProp[1]=FMin" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp[0]=FMin:VarProp[1]=FMax" )

    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood",
                            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD",
                            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA",
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" ) 

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE",
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" ) 

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX",
                            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" ) 

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS",
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD",
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA",
                             "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )

   # Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoam",
                            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod( TMVA.Types.kPDEFoam, "PDEFoamBoost",
                            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN",
                            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" )

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod( TMVA.Types.kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "FisherG", "H:!V:VarTransform=Gauss" )

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "BoostedFisher", 
                            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );

    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );

    if "FDA_SA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_SA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator" )

    if "MLPBFGS" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" )

    if "MLPBNN" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ) # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=2000:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) #n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" )

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTG",
                            "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )

    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT",
                            "!H:!V:NTrees=850:nEventsMin=150:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTB" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTB",
                            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning" )

    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD",
                            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    if( gui ):
        gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
        # keep the ROOT thread running
        gApplication.Run() 
Exemplo n.º 9
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAnalysis: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "../macros/" )
    gROOT.Macro       ( "../macros/TMVAlogon.C" )    
    gROOT.LoadMacro   ( "../macros/TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAnalysis.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAnalysis", outputFile, "!V:!Silent:Color" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "var1+var2", 'F' )
    factory.AddVariable( "var1-var2", 'F' )
    factory.AddVariable( "var3",      'F' )
    factory.AddVariable( "var4",      'F' )

    # Read input data
    if not gSystem.AccessPathName( infname ):
        input = TFile( infname )
    else:
        print "ERROR: could not access data file %s\n" % infname

    # Get the signal and background trees for training
    signal      = input.Get( treeNameSig )
    background  = input.Get( treeNameBkg )
    
    # Global event weights (see below for setting event-wise weights)
    signalWeight     = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    factory.AddSignalTree    ( signal,     signalWeight     )
    factory.AddBackgroundTree( background, backgroundWeight )

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )
    
    # Use the following code instead of the above two or four lines to add signal and background 
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input 
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #    
    # ... *** please lookup code in TMVA/macros/TMVAnalysis.C ***
    #    
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================    
            
    # This would set individual event weights (the variables defined in the 
    # expression need to exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression("weight1*weight2")
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2")

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut( "" ) 
    mycutBkg = TCut( "" ) 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "NSigTrain=3000:NBkgTrain=3000:SplitMode=Random:NormMode=NumEvents:!V" )

    # ... and alternative call to use a different number of signal and background training/test event is:
    # factory.PrepareTrainingAndTestTree( mycut, "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" )

    # --------------------------------------------------------------------------------------------------

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA", 
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    if "CutsGA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
                            "H:!V:FitMethod=GA:EffSel:Steps=30:Cycles=3:PopSize=100:SC_steps=10:SC_rate=5:SC_factor=0.95:VarProp=FSmart" )
   
    if "CutsSA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
                            "H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemperature=IncreasingAdaptive:InitialTemperature=1e+6:MinTemperature=1e-6:Eps=1e-10:UseDefaultScale" )
   
    # Likelihood
    if "Likelihood" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "Likelihood", 
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmoothBkg[1]=10:NSmooth=10:NAvEvtPerBin=50" )

    # test the decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodD", 
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" )

    if "LikelihoodPCA" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodPCA", 
                            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=10:NSmoothBkg[0]=10:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" )
 
    # test the new kernel density estimator
    if "LikelihoodKDE" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodKDE", 
                            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" )

    # test the mixed splines and kernel density estimator (depending on which variable)
    if "LikelihoodMIX" in mlist:
        factory.BookMethod( TMVA.Types.kLikelihood, "LikelihoodMIX", 
                            "!H:!V:!TransformOutput:PDFInterpol[0]=KDE:PDFInterpol[1]=KDE:PDFInterpol[2]=Spline2:PDFInterpol[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" )

    # PDE - RS method
    if "PDERS" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERS", 
                            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )
        # And the options strings for the MinMax and RMS methods, respectively:
        #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" 
        #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" 
        
    if "PDERSkNN" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSkNN", 
                            "!H:!V:VolumeRangeMode=kNN:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" )

    if "PDERSD" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSD", 
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" )

    if "PDERSPCA" in mlist:
        factory.BookMethod( TMVA.Types.kPDERS, "PDERSPCA", 
                            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" )
  
    # K-Nearest Neighbour class ifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod( TMVA.Types.kKNN, "KNN", 
                            "nkNN=400:TreeOptDepth=6:ScaleFrac=0.8:!UseKernel:!Trim" ) 

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod( TMVA.Types.kHMatrix, "HMatrix", "!H:!V" ) 
        
    # Fisher discriminant
    if "Fisher" in mlist:
        factory.BookMethod( TMVA.Types.kFisher, "Fisher", 
                            "H:!V:!Normalise:CreateMVAPdfs:Fisher:NbinsMVAPdf=50:NsmoothMVAPdf=1" )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MC",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" )
   
    if "FDA_GA" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GA",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=3:Steps=20:Trim=True:SaveBestGen=0" )

    if "FDA_MT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" )

    if "FDA_GAMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_GAMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" )

    if "FDA_MCMT" in mlist:
        factory.BookMethod( TMVA.Types.kFDA, "FDA_MCMT",
                            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod( TMVA.Types.kMLP, "MLP", "H:!V:!Normalise:NeuronType=tanh:NCycles=200:HiddenLayers=N+1,N:TestRate=5" )

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=500:HiddenLayers=N+1,N"  ) # n_cycles:#nodes:#nodes:...  
        
    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod( TMVA.Types.kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"  ) # n_cycles:#nodes:#nodes:..
  
    # Support Vector Machines using three d ifferent Kernel types (Gauss, polynomial and linear)
    if "SVM_Gauss" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM_Gauss", "Sigma=2:C=1:Tol=0.001:Kernel=Gauss" )
      
    if "SVM_Poly" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM_Poly", "Order=4:Theta=1:C=0.1:Tol=0.001:Kernel=Polynomial" )
      
    if "SVM_Lin" in mlist:
        factory.BookMethod( TMVA.Types.kSVM, "SVM_Lin", "!H:!V:Kernel=Linear:C=1:Tol=0.001" ) 
        
    # Boosted Decision Trees (second one with decorrelation)
    if "BDT" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDT", 
                            "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=1.5" )
    if "BDTD" in mlist:
        factory.BookMethod( TMVA.Types.kBDT, "BDTD", 
                            "!H:!V:NTrees=400:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=CostComplexity:PruneStrength=2.5:VarTransform=Decorrelate" )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFit",
                            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" )

    # Friedman's RuleFit method, implementation by J. Friedman
    if "RuleFitJF" in mlist:
        factory.BookMethod( TMVA.Types.kRuleFit, "RuleFitJF",
                            "!V:RuleFitModule=RFFriedman:Model=ModRuleLinear:GDStep=0.01:GDNSteps=10000:GDErrScale=1.1:RFNendnodes=4" )
 
    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAnalysis is done!\n"
    
    # open the GUI for the result macros    
    gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    
    # keep the ROOT thread running
    gApplication.Run() 
Exemplo n.º 10
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "inputtrees=", "outputfile=",
            "verbose", "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )
    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    varList = varsList.varList

    for iVar in varList:
        factory.AddVariable(iVar, 'F')

    #factory.AddVariable( "NBTags",'I' )

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
    print "here"

    massPoint = infname
    preFix = varsList.preFix
    infname = "ZPrime_%s_all_SYNC_%s_noIso%s" % (massPoint, varsList.fs,
                                                 varsList.tail)
    iFileSig = TFile.Open(preFix + infname)
    sigChain = iFileSig.Get("eventTree_train")
    factory.AddSignalTree(sigChain)

    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1]))
        print preFix + varsList.bkg[i][1]
        bkg_trees_list.append(bkg_list[i].Get("eventTree"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        factory.AddBackgroundTree(bkg_trees_list[i], 1)
        print "%s:\t\t%.2f" % (varsList.bkg[i][0],
                               bkg_trees_list[i].GetEntries())

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    factory.SetSignalWeightExpression('weightWithPU')
    factory.SetBackgroundWeightExpression('weightWithPU')

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut("")
    mycutBkg = TCut("")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=1:nTest_Background=1:SplitMode=Block:NormMode=NumEvents:!V"
    )
    #                                         "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # Fisher discriminant (same as LD)
    #    if "Fisher" in mlist:
    #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )
    #       factory.BookMethod( TMVA.Types.kFisher, "Fisher")
    bdtSetting = '!H:!V:NTrees=%s' % nTrees
    bdtSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100'
    if "BDT" in mlist:
        factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting)

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    #
    #     print "=== wrote root file %s\n" % outfname
    #     print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    #     gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )

    ChangeWeightName = 'mv %s/weights/TMVAClassification_BDT.weights.xml %s/weights/TMVAClassification_BDT.weights_both_%s.xml' % (
        os.getcwd(), os.getcwd(), massPoint)
    os.system(ChangeWeightName)
Exemplo n.º 11
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # If you wish to modify default settings 
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    #varList = ['svMass', 'mJJ', 'met/fMass','pZ - pZV']
     varList = ['svMass', 'fMass', 'dRTauTau', 'dRJJ', 'svPt', 'dRhh', 'met', 'mJJReg',
                'metTau1DPhi', 'metTau2DPhi', 'metJ1DPhi', 'metJ2DPhi', 'metTauPairDPhi', 'metSvTauPairDPhi', 'metJetPairDPhi',]
def main():
    # Default settings for command line arguments
    DEFAULT_OUTFNAME = "TMVAXi2.root"
    DEFAULT_INFNAME = "MC_Xic0_2015_filtered.root"
    DEFAULT_TREESIG = "DecayTree"
    DEFAULT_TREEBKG = "DecayTree"
    DEFAULT_METHODS = "Cuts,CutsD,CutsPCA,CutsGA,CutsSA,Likelihood,LikelihoodD,LikelihoodPCA,LikelihoodKDE,LikelihoodMIX,PDERS,PDERSD,PDERSPCA,PDEFoam,PDEFoamBoost,KNN,LD,Fisher,FisherG,BoostedFisher,HMatrix,FDA_GA,FDA_SA,FDA_MC,FDA_MT,FDA_GAMT,FDA_MCMT,MLP,MLPBFGS,MLPBNN,CFMlpANN,TMlpANN,SVM,BDT,BDTD,BDTG,BDTB,RuleFit"

    import argparse
    argparser = argparse.ArgumentParser()
    argparser.add_argument("-m",
                           "--methods",
                           default=repr(DEFAULT_METHODS.split(',')))
    argparser.add_argument("-o", "--outputfile", default=DEFAULT_OUTFNAME)
    argparser.add_argument('--variables')
    argparser.add_argument('-s', '--spectators', default='()')
    argparser.add_argument('--signalfile', default=DEFAULT_INFNAME)
    argparser.add_argument('--signaltree', default=DEFAULT_TREESIG)
    argparser.add_argument('--signalsel', default='')
    argparser.add_argument('--signalweight', default='')
    argparser.add_argument('--bkgfile', default=DEFAULT_INFNAME)
    argparser.add_argument('--bkgtree', default=DEFAULT_TREEBKG)
    argparser.add_argument('--bkgsel', default='')
    argparser.add_argument('--bkgweight', default='')
    argparser.add_argument('--factoryname', default="TMVAClassification")
    argparser.add_argument('-v',
                           '--verbose',
                           action='store_true',
                           default=False)
    argparser.add_argument('--weightsdir', default='weights')
    argparser.add_argument('--datasetname', default='dataset')

    args = argparser.parse_args()

    weightsdir = args.weightsdir
    ROOT.TMVA.Config.Instance().GetIONames().fWeightFileDir = weightsdir

    # Print methods
    mlist = eval(args.methods)
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    if os.path.exists('./TMVAlogon.C'):
        gROOT.Macro("./TMVAlogon.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(args.outputfile, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        args.factoryname, outputFile,
        "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(args.verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    # For ROOT v6 compatibility.
    root6 = not hasattr(factory, 'AddVariable')
    if root6:
        dataloader = ROOT.TMVA.DataLoader(args.datasetname)
    else:
        dataloader = factory

    for var in eval(args.variables):
        if not isinstance(var, (tuple, list)):
            var = (var, )
        try:
            dataloader.AddVariable(*var)
        except:
            print 'Failed to call dataloader.AddVariable with args', var
            raise
    # dataloader.AddVariable( "myvar1 := var1+var2", 'F' )
    # dataloader.AddVariable( "myvar2 := var1-var2", "Expression 2", "", 'F' )
    # dataloader.AddVariable( "var3",                "Variable 3", "units", 'F' )
    # dataloader.AddVariable( "var4",                "Variable 4", "units", 'F' )

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
    for var in eval(args.spectators):
        if not isinstance(var, (tuple, list)):
            var = (var, )
        try:
            dataloader.AddSpectator(*var)
        except:
            print 'Failed to call dataloader.AddSpectator with args', var
            raise
    # dataloader.AddSpectator( "spec1:=var1*2",  "Spectator 1", "units", 'F' )
    # dataloader.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    # if gSystem.AccessPathName( infname ) != 0: gSystem.Exec( "wget http://root.cern.ch/files/" + infname )

    # input = TFile.Open( infname )

    # # Get the signal and background trees for training
    # signal      = input.Get( treeNameSig )
    # background  = input.Get( treeNameBkg )

    signalfile = TFile.Open(args.signalfile)
    if signalfile.IsZombie():
        raise OSError("Couldn't find signal file " + repr(args.signalfile))
    signal = signalfile.Get(args.signaltree)
    if not signal:
        raise ValueError("Couldn't find signal TTree " +
                         repr(args.signaltree) + " in file " +
                         repr(args.signalfile))

    bkgfile = TFile.Open(args.bkgfile)
    if bkgfile.IsZombie():
        raise OSError("Couldn't find bkg file " + repr(args.bkgfile))
    background = bkgfile.Get(args.bkgtree)
    if not background:
        raise ValueError("Couldn't find bkg TTree " + repr(args.bkgtree) +
                         " in file " + repr(args.bkgfile))

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree(signal, signalWeight)
    dataloader.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    dataloader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : dataloader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: dataloader.SetBackgroundWeightExpression("weight1*weight2");
    if args.signalweight:
        dataloader.SetSignalWeightExpression(args.signalweight)
    if args.bkgweight:
        dataloader.SetBackgroundWeightExpression(args.bkgweight)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(args.signalsel)
    mycutBkg = TCut(args.bkgsel)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if root6:
        # Bit of an ugly hack, but does the job.
        factory._BookMethod = factory.BookMethod

        # Don't know why 'self' isn't passed here?
        def BookMethod(*args):
            factory._BookMethod(dataloader, *args)

        factory.BookMethod = BookMethod

    if "Cuts" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLP",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedGrad:GradBaggingFraction=0.5:nCuts=20:MaxDepth=2"
        )

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outputFile.GetName()
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    if not ROOT.gROOT.IsBatch():
        if hasattr(TMVA, 'TMVAGui'):
            TMVA.TMVAGui(outputFile.GetName())
            raw_input('Hit enter to quit.')
        elif 'ROOTSYS' in os.environ:
            tmvaguipath = os.path.join(os.environ['ROOTSYS'], 'tutorials',
                                       'tmva')
            if os.path.exists(os.path.join(tmvaguipath, 'TMVAGui.C')):
                gROOT.SetMacroPath(tmvaguipath)
                gROOT.LoadMacro("TMVAGui.C")
                try:
                    gROOT.ProcessLine("TMVAGui(\"%s\")" % outputFile.GetName())
                    raw_input('Hit enter to quit.')
                except RuntimeError:
                    print "Couldn't run TMVAGui!"

    outputfilename = outputFile.GetName()
    weightsfiles = dict(
        (m,
         os.path.join(weightsdir, args.factoryname + '_' + m + '.weights.xml'))
        for m in mlist)
    classfiles = dict(
        (m, os.path.join(weightsdir, args.factoryname + '_' + m + '.class.C'))
        for m in mlist)

    # keep the ROOT thread running (this makes the function hang).
    #gApplication.Run()

    # TMVA disables unused branches when copying the trees then doesn't change them back.
    background.SetBranchStatus('*', 1)
    signal.SetBranchStatus('*', 1)
    if 'signalfile' in locals():
        signalfile.Close()
    if 'bkgfile' in locals():
        bkgfile.Close()
    return locals()
Exemplo n.º 13
0
def TMVAClassification(methods,
                       sigfname,
                       bkgfname,
                       optname,
                       channel,
                       trees,
                       verbose=False):  #="DecayTree,DecayTree"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Define trees
    trees = trees.split(",")
    if len(trees) - trees.count('') != 2:
        print "ERROR: need to give two trees (each one for signal and background)"
        print trees
        sys.exit(1)
    treeNameSig = trees[0]
    treeNameBkg = trees[1]

    # Print output file and directory
    outfname = "TMVA_%s_%s.root" % (channel, optname)
    myWeightDirectory = "weights_%s_%s" % (channel, optname)
    print "=== TMVAClassification: output will be written to:"
    print "=== %s" % outfname
    print "=== %s" % myWeightDirectory

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    #gROOT.SetMacroPath( "./" )
    #gROOT.Macro       ( "./tmva/test/TMVAlogon.C" )
    #gROOT.LoadMacro   ( "./tmva/test/TMVAGui.C" ) ###Is this really necessary??

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Setup TMVA
    TMVA.Tools.Instance()

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # Load data
    dataloader = TMVA.DataLoader("dataset")

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = myWeightDirectory
    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    print "*** Training on channel:"
    print "*** %s" % channel
    print "***"
    '''
    if channel == "1":
        #dataloader.AddVariable( "pplus_ProbNNp",                      "Prob(p^{+})",                             "",     'F' );
        #dataloader.AddVariable( "Kminus_ProbNNk",                     "Prob(K^{-})",                             "",     'F' );

        dataloader.AddVariable( "pplus_PT",                           "P_{T}(p^{+})",                             "MeV", 'F' );
        dataloader.AddVariable( "Kminus_PT",                          "P_{T}(K^{-})",                             "MeV", 'F' );
        dataloader.AddVariable( "gamma_PT",                           "P_{T}(#gamma)",                            "MeV", 'F' );
        dataloader.AddVariable( "Lambda_1520_0_PT",                   "P_{T}(#Lambda(1520))",                     "MeV", 'F' );
        dataloader.AddVariable( "B_PT",                               "P_{T}(#Lambda_{b})",                       "MeV", 'F' );

        dataloader.AddVariable( "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)","#beta",             "MeV", 'F' );
        dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );
        dataloader.AddVariable( "MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P","P_{tot,2}",                          "MeV", 'F' );

        dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","MeV", 'F' );
        dataloader.AddVariable( "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)","#eta(K^{-})-#eta(p^{+})","MeV", 'F' );

        dataloader.AddVariable( "pplus_IPCHI2_OWNPV",                 "#chi^{2}_{IP}(p^{+})",                       ""  ,  'F' );
        dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable( "B_IPCHI2_OWNPV",                     "#chi^{2}_{IP}(#Lambda_{b})",                 ""  ,  'F' );
        #dataloader.AddVariable( "gamma_IPCHI2_OWNPV",                 "IP #chi^{2}(#gamma)",                       ""  ,  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IP_OWNPV",             "IP(#Lambda(1520))",                        "mm",  'F' );
        #dataloader.AddVariable( "Lambda_1520_0_IPCHI2_OWNPV",         "IP#chi^{2}(#Lambda(1520))",               "",    'F' );
        
        dataloader.AddVariable( "Lambda_1520_0_OWNPV_CHI2",           "#chi^{2}_{vertex}(#Lambda(1520))",           ""  ,  'F' );
        dataloader.AddVariable( "B_OWNPV_CHI2",                       "#chi^{2}_{vertex}(#Lambda_{b})",             ""  ,  'F' );
        dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' );
        
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda(1520))",               "",    'F' );
        dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' );
    '''

    if channel == "2":
        dataloader.AddVariable("pplus_PT", "P_{T}(p^{+})", "MeV", 'F')
        dataloader.AddVariable("Kminus_PT", "P_{T}(K^{-})", "MeV", 'F')
        dataloader.AddVariable("gamma_PT", "P_{T}(#gamma)", "MeV", 'F')
        dataloader.AddVariable("Lambda_1520_0_PT", "P_{T}(#Lambda*)", "MeV",
                               'F')
        dataloader.AddVariable("B_PT", "P_{T}(#Lambda_{b})", "MeV", 'F')

        dataloader.AddVariable(
            "beta:=(-gamma_P+Kminus_P+pplus_P)/(gamma_P+Kminus_P+pplus_P)",
            "#beta", "", 'F')
        #ok
        #dataloader.AddVariable( "MomCons1:=-B_P+gamma_P+Lambda_1520_0_P","P_{tot,1}",                               "MeV", 'F' );#BDT learned Mass check1
        dataloader.AddVariable("MomCons2:=-Lambda_1520_0_P+Kminus_P+pplus_P",
                               "P_{tot,2}", "MeV", 'F')
        #ok

        #dataloader.AddVariable( "Sum_Kminus_p_eta:=atanh(pplus_PZ/pplus_P)+atanh(Kminus_PZ/Kminus_P)","#eta(K^{-})+#eta(p^{+})","", 'F' );#99correlationL_eta
        dataloader.AddVariable(
            "Diff_Kminus_p_eta:=atanh(Kminus_PZ/Kminus_P)-atanh(pplus_PZ/pplus_P)",
            "#eta(K^{-})-#eta(p^{+})", "", 'F')
        dataloader.AddVariable(
            "Lambda_1520_0_eta:=atanh(Lambda_1520_0_PZ/Lambda_1520_0_P)",
            "#eta(#Lambda*)", "", 'F')
        dataloader.AddVariable("gamma_eta:=atanh(gamma_PZ/gamma_P)",
                               "#eta(#gamma)", "", 'F')

        dataloader.AddVariable("pplus_IPCHI2_OWNPV", "#chi^{2}_{IP}(p^{+})",
                               "", 'F')
        #dataloader.AddVariable( "Kminus_IPCHI2_OWNPV",                "#chi^{2}_{IP}(K^{-})",                       ""  ,  'F' );
        dataloader.AddVariable("B_IPCHI2_OWNPV", "#chi^{2}_{IP}(#Lambda_{b})",
                               "", 'F')
        dataloader.AddVariable("Lambda_1520_0_IPCHI2_OWNPV",
                               "#chi^{2}_{IP}(#Lambda*)", "", 'F')

        dataloader.AddVariable("Lambda_1520_0_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda*)", "", 'F')
        dataloader.AddVariable("B_OWNPV_CHI2",
                               "#chi^{2}_{vertex}(#Lambda_{b})", "", 'F')
        #dataloader.AddVariable( "B_BMassFit_chi2/B_BMassFit_nDOF",    "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#BDT learned Mass check1
        #dataloader.AddVariable( "B_PVFit_chi2/B_PVFit_nDOF",          "#chi^{2}_{DTF}/n_{dof}",                     ""  ,  'F' );#put it out because array

        #dataloader.AddVariable( "B_DIRA_OWNPV",                       "DIRA(#Lambda_{b})",                          ""  ,  'F' ); #not used by BDT
        #dataloader.AddVariable( "Lambda_1520_0_DIRA_OWNPV",           "DIRA(#Lambda*)",                        ""  ,  'F' ); #not used
        #dataloader.AddVariable( "Lambda_1520_0_FDCHI2_OWNPV",         "FD #chi^{2}(#Lambda*)",               "",    'F' ); #not used
        #dataloader.AddVariable( "B_FDCHI2_OWNPV",                     "#chi^{2}_{FD}(#Lambda_{b})",                 "",    'F' ); #not used

    # Add Spectator Variables: not used for Training but written in final TestTree
    #dataloader.AddSpectator( "B_M",                                   "M(#Lambda_{b})",                           "MeV");
    #dataloader.AddSpectator( "Lambda_1520_0_M",                       "M(#Lambda*)",                         "MeV");

    # Read input data
    if gSystem.AccessPathName(sigfname) != 0:
        print "Can not find %s" % sigfname
    if gSystem.AccessPathName(bkgfname) != 0:
        print "Can not find %s" % bkgfname

    inputSig = TFile.Open(sigfname)
    inputBkg = TFile.Open(bkgfname)

    # Get the signal and background trees for training
    signal = inputSig.Get(treeNameSig)
    background = inputBkg.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    dataloader.AddSignalTree(signal, signalWeight)
    dataloader.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    dataloader.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    dataloader.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : dataloader.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: dataloader.SetBackgroundWeightExpression("weight1*weight2");
    #dataloader.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut(
        ""
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40")# B_BKGCAT==0 directly applied in root sample
    #print(sigfname + str( mycutSig ) + treeNameSig)

    mycutBkg = TCut(
        "B_M>6120"
    )  #"pplus_ProbNNp>0.2 && Kminus_ProbNNk>0.2 && B_PT>4000 && Lambda_1520_0_PT>1500 && gamma_PT>3000 && pplus_PT>1000 && B_FDCHI2_OWNPV>100 && pplus_IPCHI2_OWNPV>50 && Kminus_IPCHI2_OWNPV>40 && B_M>6120")#(B_M>6120 || B_M<5120)" )
    #print(bkgfname + str( mycutBkg ) + treeNameBkg)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    dataloader.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            dataloader,
            TMVA.Types.kMLP,
            "MLP",
            #"!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+3:TestRate=5:!UseRegulator" )#Try
            "!H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )  #Old

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(dataloader, TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=600:BoostType=Grad:Shrinkage=0.1:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=15:MaxDepth=2"
        )  #Settings3
        #"!H:!V:NTrees=300:BoostType=Grad:Shrinkage=0.11:UseBaggedGrad:GradBaggingFraction=0.73:SeparationType=GiniIndex:nCuts=17:MaxDepth=4" )#AnaNote
        #"!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedGrad:GradBaggingFraction=0.6:SeparationType=GiniIndex:nCuts=20:NNodesMax=5" )#Old

    if "BDT" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:nEventsMin=400:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            dataloader, TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    print("FLAG 0")
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    if not gROOT.IsBatch(): TMVA.TMVAGui(outfname)
Exemplo n.º 14
0
def main():

    try:
        # Retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # Print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)

            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVApplication: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("../macros/")
    gROOT.Macro("../macros/TMVAlogon.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Create the Reader object
    reader = TMVA.Reader("!Color")

    # Create a set of variables and declare them to the reader
    # - the variable names must corresponds in name and type to
    # those given in the weight file(s) that you use

    # what to do ???
    var1 = array('f', [0])
    var2 = array('f', [0])
    var3 = array('f', [0])
    var4 = array('f', [0])
    reader.AddVariable("var1+var2", var1)
    reader.AddVariable("var1-var2", var2)
    reader.AddVariable("var3", var3)
    reader.AddVariable("var4", var4)

    # book the MVA methods
    dir = "weights/"
    prefix = "TMVAnalysis_"

    for m in mlist:
        reader.BookMVA(m + " method", dir + prefix + m + ".weights.txt")

    #######################################################################
    # For an example how to apply your own plugin method, please see
    # TMVA/macros/TMVApplication.C
    #######################################################################

    # Book output histograms
    nbin = 80

    histList = []
    for m in mlist:
        histList.append(TH1F(m, m, nbin, -3, 3))

    # Book example histogram for probability (the other methods would be done similarly)
    if "Fisher" in mlist:
        probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1)
        rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0,
                            1)

    # Prepare input tree (this must be replaced by your data source)
    # in this example, there is a toy tree with signal and one with background events
    # we'll later on use only the "signal" events for the test in this example.
    #
    fname = "./tmva_example.root"
    print "--- Accessing data file: %s" % fname
    input = TFile.Open(fname)
    if not input:
        print "ERROR: could not open data file: %s" % fname
        sys.exit(1)

    #
    # Prepare the analysis tree
    # - here the variable names have to corresponds to your tree
    # - you can use the same variables as above which is slightly faster,
    #   but of course you can use different ones and copy the values inside the event loop
    #
    print "--- Select signal sample"
    theTree = input.Get("TreeS")
    userVar1 = array('f', [0])
    userVar2 = array('f', [0])
    theTree.SetBranchAddress("var1", userVar1)
    theTree.SetBranchAddress("var2", userVar2)
    theTree.SetBranchAddress("var3", var3)
    theTree.SetBranchAddress("var4", var4)

    # Efficiency calculator for cut method
    nSelCuts = 0
    effS = 0.7

    # Process the events
    print "--- Processing: %i events" % theTree.GetEntries()
    sw = TStopwatch()
    sw.Start()
    for ievt in range(theTree.GetEntries()):

        if ievt % 1000 == 0:
            print "--- ... Processing event: %i" % ievt

        # Fill event in memory
        theTree.GetEntry(ievt)

        # Compute MVA input variables
        var1[0] = userVar1[0] + userVar2[0]
        var2[0] = userVar1[0] - userVar2[0]

        # Return the MVAs and fill to histograms
        if "CutsGA" in mlist:
            passed = reader.EvaluateMVA("CutsGA method", effS)
            if passed:
                nSelCuts = nSelCuts + 1

        # Fill histograms with MVA outputs
        for h in histList:
            h.Fill(reader.EvaluateMVA(h.GetName() + " method"))

        # Retrieve probability instead of MVA output
        if "Fisher" in mlist:
            probHistFi.Fill(reader.GetProba("Fisher method"))
            rarityHistFi.Fill(reader.GetRarity("Fisher method"))

    # Get elapsed time
    sw.Stop()
    print "--- End of event loop: %s" % sw.Print()

    # Return computed efficeincies
    if "CutsGA" in mlist:
        eff = float(nSelCuts) / theTree.GetEntries()
        deff = math.sqrt(eff * (1.0 - eff) / theTree.GetEntries())
        print "--- Signal efficiency for Cuts method : %.5g +- %.5g (required was: %.5g)" % (
            eff, deff, effS)

        # Test: retrieve cuts for particular signal efficiency
        mcuts = reader.FindMVA("CutsGA method")
        cutsMin = array('d', [0, 0, 0, 0])
        cutsMax = array('d', [0, 0, 0, 0])
        mcuts.GetCuts(0.7, cutsMin, cutsMax)
        print "--- -------------------------------------------------------------"
        print "--- Retrieve cut values for signal efficiency of 0.7 from Reader"
        for ivar in range(4):
            print "... Cut: %.5g < %s <= %.5g" % (
                cutsMin[ivar], reader.GetVarName(ivar), cutsMax[ivar])

        print "--- -------------------------------------------------------------"

    #
    # write histograms
    #
    target = TFile("TMVApp.root", "RECREATE")
    for h in histList:
        h.Write()

    # Write also probability hists
    if "Fisher" in mlist:
        probHistFi.Write()
        rarityHistFi.Write()

    target.Close()

    print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms"
    print "==> TMVApplication is done!"
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    #    factory = TMVA.Factory( "TMVAClassification", outputFile,
    #                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )
    jobname = DEFAULT_OUTFNAME
    factory = TMVA.Factory(
        jobname.replace(".root", ""), outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I:AnalysisType=Classification"
    )  # pascal

    # Set verbosity
    factory.SetVerbose(verbose)

    # Adjust variables if old sample is used
    if IsOLD:
        SPECTATORS.remove("JisPU")
        SPECTATORS.remove("JisHS")

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    theCat1Vars = ""
    theCat2Vars = ""
    theCat3Vars = ""
    for var in VARIABLES:
        factory.AddVariable(var, 'F')
        theCat1Vars += var + ":"
        theCat2Vars += var + ":"
        theCat3Vars += var + ":"

    theCat1Vars = theCat1Vars.rstrip(":")
    theCat2Vars = theCat2Vars.rstrip(":")
    theCat3Vars = theCat3Vars.rstrip(":")

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    for spect in SPECTATORS:
        factory.AddSpectator(spect, spect)

    # Apply additional cuts on the signal and background sample.
    mycutSig = ""
    mycutBkg = TCut(SELECTION + "&&JisPU")
    if doJTruthMatchPt10Cut:
        mycutSig = TCut(SELECTION + "&&JisHS && Jtruthpt>10")
    else:
        mycutSig = TCut(SELECTION + "&&JisHS")

    cat1cuts = TCut("Jpt >20 && Jpt <30")
    cat2cuts = TCut("Jpt >30 && Jpt <40")
    cat3cuts = TCut("Jpt >40 && Jpt <50")

    # open file
    input = TFile.Open(infname)

    # Get the signal and background trees for training
    signal = input.Get(treeNameSig)
    background = input.Get(treeNameBkg)

    # Global event weights (see below for setting event-wise weights)
    signalWeight = 1.0
    backgroundWeight = 1.0

    # ====== register trees ====================================================
    factory.AddSignalTree(signal, signalWeight)
    factory.AddBackgroundTree(background, backgroundWeight)

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    TrainingAndTestTreeStr= "nTrain_Signal="+str(DEFAULT_NEVENTS_TRAIN_S)+\
                            ":nTrain_Background="+str(DEFAULT_NEVENTS_TRAIN_B)+\
                            ":nTest_Signal="+str(DEFAULT_NEVENTS_TEST_S)+\
                            ":nTest_Background="+str(DEFAULT_NEVENTS_TEST_B)+\
                            ":SplitMode=Random:NormMode=EqualNumEvents:!V"
    factory.PrepareTrainingAndTestTree(mycutSig, mycutBkg,
                                       TrainingAndTestTreeStr)

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #

    # multidim likelihood --- kNN
    if "kNN100" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN100",
            "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=False:BalanceDepth=6"
        )

    if "kNN100trim" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN100trim",
            "!V:H:nkNN=100:ScaleFrac=0.8:UseKernel=F:UseWeight=F:Trim=True:BalanceDepth=6"
        )

    if "likelihood" in mlist:
        factory.BookMethod(TMVA.Types.kLikelihood, "Likelihood", "H:!V:")

    if "BDT" in mlist:
        BDToptions = "!H:NTrees=850:nEventsMin=150:MaxDepth=5:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20:PruneMethod=NoPruning:VerbosityLevel=Error"
        factory.BookMethod(TMVA.Types.kBDT, "BDT", BDToptions)

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.
    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    gROOT.ProcessLine("TMVAGui(\"%s\")" % outfname)

    # keep the ROOT thread running
    gApplication.Run()
Exemplo n.º 16
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:n:d:k:l:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=",
            "varListKey=", "inputtrees=", "outputfile=", "verbose", "help",
            "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    nTrees = DEFAULT_NTREES
    mDepth = DEFAULT_MDEPTH
    mass = DEFAULT_MASS
    varListKey = DEFAULT_VARLISTKEY
    verbose = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
            mDepth = a
        elif o in ("-k", "--mass"):
            mass = a
        elif o in ("-l", "--varListKey"):
            varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList)) + 'vars'
    Note = methods + '_' + varListKey + '_' + nVars + '_mDepth' + mDepth + '_M' + mass
    outfname = "weights/TMVA_" + Note + ".root"
    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
#     gROOT.SetMacroPath( "./" )
#     gROOT.Macro       ( "./TMVAlogon.C" )
#    gROOT.LoadMacro   ( "./TMVAGui.C" )

# Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    #     factory = TMVA.Factory( "TMVAClassification", outputFile,
    #                             "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/" + Note

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    for iVar in varList:
        if iVar[0] == 'NJets_singleLepCalc':
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'I')
        else:
            factory.AddVariable(iVar[0], iVar[1], iVar[2], 'F')

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables

    inputDir = varsList.inputDir
    print 'mass point ' + mass
    infname = "ChargedHiggs_HplusTB_HplusToTB_M-%s_13TeV_amcatnlo_pythia8_hadd.root" % (
        mass)
    iFileSig = TFile.Open(inputDir + infname)
    sigChain = iFileSig.Get("ljmet")
    #    os.exits(1)
    #BDT machinary
    factory.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir + varsList.bkg[i]))
        print inputDir + varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        factory.AddBackgroundTree(bkg_trees_list[i], 1)

    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)

    # ====== register trees ====================================================
    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression(weightStrS)
    factory.SetBackgroundWeightExpression(weightStrB)

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    mycutSig = TCut(cutStrS)
    mycutBkg = TCut(cutStrB)

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig,
        mycutBkg,
        #                                         "nTrain_Signal=0:nTrain_Background=0:nTest_Signal=10:nTest_Background=100:SplitMode=Random:NormMode=NumEvents:!V" )
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # bdtSetting for "BDT"
    bdtSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtSetting += ':MinNodeSize=2.5%:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTMitFisher"
    bdtFSetting = '!H:!V:NTrees=%s' % nTrees
    bdtFSetting += ':MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20'
    bdtFSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTG"
    bdtGSetting = '!H:!V:NTrees=%s:MaxDepth=%s' % (nTrees, mDepth)
    bdtGSetting += ':MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20'
    bdtGSetting += ':Pray'  #Pray takes into account the effect of negative bins in BDTG
    #bdtGSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTB"
    bdtBSetting = '!H:!V:NTrees=%s' % nTrees
    bdtBSetting += ':MinNodeSize=2.5%:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20'
    bdtBSetting += ':IgnoreNegWeightsInTraining=True'
    # bdtSetting for "BDTD"
    bdtDSetting = '!H:!V:NTrees=%s' % nTrees
    bdtDSetting += ':MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate'
    bdtDSetting += ':IgnoreNegWeightsInTraining=True'
    #Note also that explicitly setting *nEventsMin* so far OVERWRITES the option recomeded ^[[0m

    #BOOKING AN ALGORITHM
    if methods == "BDT": factory.BookMethod(TMVA.Types.kBDT, "BDT", bdtSetting)
    if methods == "BDTG":
        factory.BookMethod(TMVA.Types.kBDT, "BDTG", bdtGSetting)
    if methods == "BDTMitFisher":
        factory.BookMethod(TMVA.Types.kBDT, "BDTMitFisher", bdtFSetting)
    if methods == "BDTB":
        factory.BookMethod(TMVA.Types.kBDT, "BDTB", bdtBSetting)
    if methods == "BDTD":
        factory.BookMethod(TMVA.Types.kBDT, "BDTD", bdtDSetting)
    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()
    #
    #     print "=== wrote root file %s\n" % outfname
    #     print "=== TMVAClassification is done!\n"

    # save plots:
    os.chdir('weights/' + Note)
    #TMVA.mvaeffs( "../../"+outfname ) #Classifier Cut Efficiencies
    gROOT.SetBatch(1)
    TMVA.efficiencies(
        "../../" + outfname
    )  #Classifier Background Rejection vs Signal Efficiency (ROC curve)
    #TMVA.efficiencies( "weights/TMVA_BDTG_APR9_33vars_mDepth3_MLow.root", 3 ) #Classifier 1/(Backgr. Efficiency) vs Signal Efficiency (ROC curve)
    TMVA.mvas("../../" + outfname,
              0)  #Classifier Output Distributions (test sample)
    TMVA.correlations(
        "../../" + outfname)  #Input Variable Linear Correlation Coefficients
    TMVA.variables("../../" + outfname)  #Input variables (training sample)
    #TMVA.mvas( "../../"+outfname ) #Classifier Output Distributions (test and training samples superimposed)
    #gROOT.ProcessLine( ".x ../../mvas.C(\"%s\",3)" % ("../../"+outfname) ) #Classifier Output Distributions (test and training samples superimposed)
    if not gROOT.IsBatch(): TMVA.TMVAGui("../../" + outfname)
    #     os.chdir('plots')
    #     try: os.system('convert CorrelationMatrixS.eps CorrelationMatrixS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert CorrelationMatrixB.eps CorrelationMatrixB_'+Note+'.png')
    #     except: pass
    #     #try: os.system('convert invBeffvsSeff.eps invBeffvsSeff_'+Note+'.png')
    #     #except: pass
    #     try: os.system('convert mva_'+Note.split('_')[0]+'.eps mva_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert mvaeffs_'+Note.split('_')[0]+'.eps mvaeffs_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert overtrain_'+Note.split('_')[0]+'.eps overtrain_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert rejBvsS.eps rejBvsS_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c1.eps variables_id_c1_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c2.eps variables_id_c2_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c3.eps variables_id_c3_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c4.eps variables_id_c4_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c5.eps variables_id_c5_'+Note+'.png')
    #     except: pass
    #     try: os.system('convert variables_id_c6.eps variables_id_c6_'+Note+'.png')
    #     except: pass
    #     os.system('rm *.eps')
    print "DONE"
Exemplo n.º 17
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "outputfile=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath("./")
    gROOT.Macro("./TMVAlogon.C")
    gROOT.LoadMacro("./TMVAGui.C")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory(
        "TMVAClassification", outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    varList = varsList.varList

    for iVar in varList:
        factory.AddVariable(iVar, 'F')

    #factory.AddVariable( "NBTags",'I' )

    # You can add so-called "Spectator variables", which are not used in the MVA training,
    # but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
    # input variables, the response values of all trained MVAs, and the spectator variables
    factory.AddSpectator("sampleName2")
    factory.AddSpectator("EVENT")
    factory.AddSpectator("svMass")
    factory.AddSpectator("met")
    factory.AddSpectator("fMass")
    factory.AddSpectator("CSVJ2")
    factory.AddSpectator("chi2KinFit")
    factory.AddSpectator("mJJ")
    factory.AddSpectator("category")
    factory.AddSpectator("triggerEff")
    factory.AddSpectator("initEvents")
    factory.AddSpectator("xs")

    #     factory.AddSpectator( "spec2:=var1*3",  "Spectator 2", "units", 'F' )

    # Read input data
    #     sigChain = r.TChain("ttTreeFinal/eventTree")
    #     bkg1Chain = r.TChain("ttTreeFinal/eventTree")
    #     bkg2Chain = r.TChain("ttTreeFinal/eventTree")

    # Get the signal and background trees for training
    #     tool.addFiles(ch=sigChain, dirName="/hdfs/store/user/zmao/H2hh260_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1)
    #     tool.addFiles(ch=bkg1Chain, dirName="/hdfs/store/user/zmao/tt_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1)
    #     tool.addFiles(ch=bkg2Chain, dirName="/hdfs/store/user/zmao/ZZ_3-SUB-TT", knownEventNumber=0, maxFileNumber=-1)

    massPoint = infname
    Lumi = varsList.Lumi
    preFix = varsList.preFix
    infname = "H2hh%s_all_tightopposite%s3rdLepVeto.root" % (massPoint,
                                                             varsList.region)
    iFileSig = TFile.Open(preFix + infname)
    sigChain = iFileSig.Get("eventTree")
    signalWeight = 1
    factory.AddSignalTree(sigChain, signalWeight)

    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(preFix + varsList.bkg[i][1]))
        bkg_trees_list.append(bkg_list[i].Get("eventTree"))
        hist_list.append(bkg_list[i].Get('preselection'))
        bkg_trees_list[i].GetEntry(0)
        weightsList.append(
            (bkg_trees_list[i].xs) / hist_list[i].GetBinContent(1))

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        if varsList.bkg[i][0] != 'QCD':
            factory.AddBackgroundTree(bkg_trees_list[i],
                                      weightsList[i] * Lumi * 1000)
            print "%s:\t\t%.2f" % (varsList.bkg[i][0],
                                   bkg_trees_list[i].GetEntries() *
                                   weightsList[i] * Lumi * 1000)
            print "%s:\t\t%.2f" % (varsList.bkg[i][0],
                                   bkg_trees_list[i].GetEntries())
        else:
            factory.AddBackgroundTree(
                bkg_trees_list[i],
                weightsList[i] * hist_list[i].GetBinContent(1))
            print "%s:\t\t%.2f" % (
                varsList.bkg[i][0], bkg_trees_list[i].GetEntries() *
                weightsList[i] * hist_list[i].GetBinContent(1))

#     iFileBkg1 = TFile.Open(location+"TMVARegApp_tt_eff_all%s_tightoppositebTag.root" %postName)
#     iFileBkg2 = TFile.Open(location+"TMVARegApp_ZZ_eff_all%s_tightoppositebTag.root" %postName)
#     iFileBkg3 = TFile.Open(location+"TMVARegApp_tt_semi_eff_all%s_tightoppositebTag.root" %postName)
#     iFileBkg4 = TFile.Open(location+"TMVARegApp_DY2JetsToLL_all_tightoppositebTag.root")
#     iFileBkg5 = TFile.Open(location+"TMVARegApp_DY3JetsToLL_all_tightoppositebTag.root")
# #     iFileBkg6 = TFile.Open(location+"TMVARegApp_W1JetsToLNu_eff2_all_tightoppositebTag.root")
# #     iFileBkg7 = TFile.Open(location+"TMVARegApp_W2JetsToLNu_eff2_all_tightoppositebTag.root")
#     iFileBkg8 = TFile.Open(location+"TMVARegApp_W3JetsToLNu_all_tightoppositebTag.root")
# #     iFileBkg9 = TFile.Open(location+"TMVARegApp_WZJetsTo2L2Q_eff_all_tightoppositebTag.root")

#     iFileBkg = TFile.Open(location+"TMVARegApp_dataTotal_all%s_relaxedsamebTag.root" %postName)

#     bkg1Chain = iFileBkg1.Get("eventTree")
#     bkg2Chain = iFileBkg2.Get("eventTree")
#     bkg3Chain = iFileBkg3.Get("eventTree")
#     bkg4Chain = iFileBkg4.Get("eventTree")
#     bkg5Chain = iFileBkg5.Get("eventTree")
#     bkg6Chain = iFileBkg6.Get("eventTree")
#     bkg7Chain = iFileBkg7.Get("eventTree")
#     bkg8Chain = iFileBkg8.Get("eventTree")
#     bkg9Chain = iFileBkg9.Get("eventTree")

#     bkgChain = iFileBkg.Get("eventTree")

# Global event weights (see below for setting event-wise weights)
    signalWeight = 1  #0.0159/sigChain.GetEntries() #xs (pb)
    #     tmpHist1 = iFileBkg1.Get('preselection')
    #     ttWeight = 26.2/tmpHist1.GetBinContent(1)
    #     tmpHist2 = iFileBkg2.Get('preselection')
    #     ZZWeight = 2.5/tmpHist2.GetBinContent(1)
    #     tmpHist3 = iFileBkg3.Get('preselection')
    #     tt_semiWeight = 109.3/tmpHist3.GetBinContent(1)
    #     tmpHist4 = iFileBkg4.Get('preselection')
    #     DY2JetsWeight = 181/tmpHist4.GetBinContent(1)
    #     tmpHist5 = iFileBkg5.Get('preselection')
    #     DY3JetsWeight = 51.1/tmpHist5.GetBinContent(1)
    #     tmpHist6 = iFileBkg6.Get('preselection')
    #     W1JetsToLNu = 5400/tmpHist6.GetBinContent(1)
    #     tmpHist7 = iFileBkg7.Get('preselection')
    #     W2JetsToLNu = 1750/tmpHist7.GetBinContent(1)
    #     tmpHist8 = iFileBkg8.Get('preselection')
    #     W3JetsToLNu = 519/tmpHist8.GetBinContent(1)
    #     tmpHist9 = iFileBkg9.Get('preselection')
    #     WZJetsTo2L2Q = 2.207/tmpHist9.GetBinContent(1)

    #     print "tt:\t\t%.2f" %(bkg1Chain.GetEntries()*ttWeight*Lumi*1000)
    #     print "ZZ:\t\t%.2f" %(bkg2Chain.GetEntries()*ZZWeight*Lumi*1000)
    #     print "tt semi:\t%.2f" %(bkg3Chain.GetEntries()*tt_semiWeight*Lumi*1000)
    #     print "DY2:\t\t%.2f" %(bkg4Chain.GetEntries()*DY2JetsWeight*Lumi*1000)
    #     print "DY3:\t\t%.2f" %(bkg5Chain.GetEntries()*DY3JetsWeight*Lumi*1000)
    #     print "WJ1:\t\t%.2f" %(bkg6Chain.GetEntries()*W1JetsToLNu*Lumi*1000)
    #     print "WJ2:\t\t%.2f" %(bkg7Chain.GetEntries()*W2JetsToLNu*Lumi*1000)
    #     print "WJ3:\t\t%.2f" %(bkg8Chain.GetEntries()*W3JetsToLNu*Lumi*1000)
    #     print "WZJ:\t\t%.2f" %(bkg9Chain.GetEntries()*WZJetsTo2L2Q*Lumi*1000)

    #     print "QCD:\t\t%.2f" %(bkgChain.GetEntries()*0.05)

    # ====== register trees ====================================================
    #
    # the following method is the prefered one:
    # you can add an arbitrary number of signal or background trees
    #     factory.AddBackgroundTree( bkgChain, 0.05)
    #     factory.AddBackgroundTree( bkg1Chain, ttWeight*Lumi*1000)
    #     factory.AddBackgroundTree( bkg2Chain, ZZWeight*Lumi*1000)
    #     factory.AddBackgroundTree( bkg3Chain, tt_semiWeight*Lumi*1000)
    #     factory.AddBackgroundTree( bkg4Chain, DY2JetsWeight*Lumi*1000)
    #     factory.AddBackgroundTree( bkg5Chain, DY3JetsWeight*Lumi*1000)
    #     factory.AddBackgroundTree( bkg6Chain, W1JetsToLNu*Lumi*1000)
    #     factory.AddBackgroundTree( bkg7Chain, W2JetsToLNu*Lumi*1000)
    #     factory.AddBackgroundTree( bkg8Chain, W3JetsToLNu*Lumi*1000)
    #     factory.AddBackgroundTree( bkg9Chain, WZJetsTo2L2Q*Lumi*1000)
    factory.SetSignalWeightExpression('triggerEff')
    factory.SetBackgroundWeightExpression('triggerEff')

    # To give different trees for training and testing, do as follows:
    #    factory.AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" )
    #    factory.AddSignalTree( signalTestTree,     signalTestWeight,  "Test" )

    # Use the following code instead of the above two or four lines to add signal and background
    # training and test events "by hand"
    # NOTE that in this case one should not give expressions (such as "var1+var2") in the input
    #      variable definition, but simply compute the expression before adding the event
    #
    #    # --- begin ----------------------------------------------------------
    #
    # ... *** please lookup code in TMVA/macros/TMVAClassification.C ***
    #
    #    # --- end ------------------------------------------------------------
    #
    # ====== end of register trees ==============================================

    # Set individual event weights (the variables must exist in the original TTree)
    #    for signal    : factory.SetSignalWeightExpression    ("weight1*weight2");
    #    for background: factory.SetBackgroundWeightExpression("weight1*weight2");
    #factory.SetBackgroundWeightExpression( "weight" )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )

    mycutSig = TCut("chi2KinFit > -10")
    mycutBkg = TCut("chi2KinFit > -10")

    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree(
        mycutSig, mycutBkg,
        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V"
    )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation

    # Fisher discriminant (same as LD)
    #    if "Fisher" in mlist:
    #factory.BookMethod( TMVA.Types.kFisher, "Fisher", "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" )
    #       factory.BookMethod( TMVA.Types.kFisher, "Fisher")

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=150:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=100"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"

    # open the GUI for the result macros
    #     gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    ChangeWeightName = 'mv /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights.xml /nfs_scratch/zmao/test/CMSSW_5_3_15/src/TMVA-v4.2.0/test/weights/TMVAClassification_BDT.weights_both_%s.xml' % massPoint
    os.system(ChangeWeightName)
Exemplo n.º 18
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:i:n:d:k:l:t:o:vh?"
        longopts   = ["methods=", "inputfile=", "nTrees=", "maxDepth=", "mass=", "varListKey=", "inputtrees=", "outputfile=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname     = DEFAULT_INFNAME
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    nTrees      = DEFAULT_NTREES
    mDepth      = DEFAULT_MDEPTH
    mass        = DEFAULT_MASS
    varListKey  = DEFAULT_VARLISTKEY
    verbose     = True
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--maxDepth"):
        	mDepth = a
        elif o in ("-k", "--mass"):
        	mass = a
        elif o in ("-l", "--varListKey"):
        	varListKey = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-n", "--nTrees"):
            nTrees = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit( ' ' )
            trees.sort()
            trees.reverse()
            if len(trees)-trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    varList = varsList.varList[varListKey]
    nVars = str(len(varList))+'vars'
    Note=''+methods+'_'+varListKey+'_'+nVars+'_mDepth'+mDepth
    outfname = "dataset/weights/TMVA_"+Note+".root"
    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()
			
    # Import ROOT classes

    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;:AnalysisType=Classification" )

    factory.SetVerbose( verbose )
    (TMVA.gConfig().GetIONames()).fWeightFileDir = "weights/"+Note

    dataloader = TMVA.DataLoader('dataset')

    for iVar in varList:
        if iVar[0]=='NJets_singleLepCalc': dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'I')
        else: dataloader.AddVariable(iVar[0],iVar[1],iVar[2],'F')

    inputDir = varsList.inputDir
    infname = "TTTT_TuneCP5_PSweights_13TeV-amcatnlo-pythia8_hadd.root"
    iFileSig = TFile.Open(inputDir+infname)
    sigChain = iFileSig.Get("ljmet")
    dataloader.AddSignalTree(sigChain)
    bkg_list = []
    bkg_trees_list = []
    hist_list = []
    weightsList = []
    for i in range(len(varsList.bkg)):
        bkg_list.append(TFile.Open(inputDir+varsList.bkg[i]))
        print inputDir+varsList.bkg[i]
        bkg_trees_list.append(bkg_list[i].Get("ljmet"))
        bkg_trees_list[i].GetEntry(0)

        if bkg_trees_list[i].GetEntries() == 0:
            continue
        dataloader.AddBackgroundTree( bkg_trees_list[i], 1)

    signalWeight = 1 

    dataloader.SetSignalWeightExpression( weightStrS )
    dataloader.SetBackgroundWeightExpression( weightStrB )

    mycutSig = TCut( cutStrS )
    mycutBkg = TCut( cutStrB ) 

    dataloader.PrepareTrainingAndTestTree( mycutSig, mycutBkg, "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    kerasSetting = 'H:!V:VarTransform=G:FilenameModel=model.h5:NumEpochs=10:BatchSize=1028'

    model = Sequential()
    model.add(Dense(100, activation='relu', input_dim=53))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(100, activation="relu")))
    model.add((Dense(2, activation="sigmoid")))

    # Set loss and optimizer
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy',])

    # Store model to file
    model.save('model.h5')
    model.summary()

    if methods=="Keras": factory.BookMethod(dataloader, TMVA.Types.kPyKeras, "PyKeras",kerasSetting)
    
    factory.TrainAllMethods()
    factory.TestAllMethods()
    factory.EvaluateAllMethods()    
    
    outputFile.Close()
    # save plots:
    print "DONE"
Exemplo n.º 19
0
def main():

    try:
        # retrive command line options
        shortopts = "m:i:t:b:s:a:o:vh?"
        longopts = [
            "methods=", "inputfile=", "inputtrees=", "bkgList=", "sigList=",
            "anaType=", "outputfile=", "verbose", "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    infname = DEFAULT_INFNAME
    bkgList = DEFAULT_BKGLIST
    sigList = DEFAULT_SIGLIST
    anaType = DEFAULT_ANATYPE
    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    outfname = DEFAULT_OUTFNAME
    methods = DEFAULT_METHODS
    verbose = False
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-i", "--inputfile"):
            infname = a
        elif o in ("-b", "--bkgList"):
            bkgList = a
        elif o in ("-s", "--sigList"):
            sigList = a
        elif o in ("-a", "--anaType"):
            anaType = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-t", "--inputtrees"):
            a.strip()
            trees = a.rsplit(' ')
            trees.sort()
            trees.reverse()
            if len(trees) - trees.count('') != 2:
                print "ERROR: need to give two trees (each one for signal and background)"
                print trees
                sys.exit(1)
            treeNameSig = trees[0]
            treeNameBkg = trees[1]
        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut

    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    #Edited
    setupTMVA(pathToTMVA)

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile(outfname, 'RECREATE')

    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in
    # front of the "Silent" argument in the option string
    suffix = outfname.split("/")[-1].split(".")[0]
    factory = TMVA.Factory(
        "TMVAClassification_%s" % suffix, outputFile,
        "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P:AnalysisType=Classification"
    )

    # Set verbosity
    factory.SetVerbose(verbose)

    # If you wish to modify default settings
    # (please check "src/Config.h" to see all available global options)
    #    gConfig().GetVariablePlotting()).fTimesRMS = 8.0
    #    gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory"

    # Define the input variables that shall be used for the classifier training
    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]

    #Edited
    #follow 2LSS note Ch5.2: Disciminating variables
    factory.AddVariable("mT2    := sig.mT2", 'F')
    factory.AddVariable("pt     := l12.pt", 'F')
    factory.AddVariable("MET    := sig.MetRel", 'F')
    factory.AddVariable("Ht     := Sum$(jets.pt) + Sum$(leps.pt)", 'F')
    factory.AddVariable("mTl1   := leps.mT[0]", 'F')
    factory.AddVariable("mTl2   := leps.mT[1]", 'F')
    factory.AddVariable("ll_dPhi:= l12.dPhi", 'F')
    factory.AddVariable(
        "l12m   := (int(abs(leps.ID[0]))!=int(abs(leps.ID[1])))*100 + l12.m",
        'F')

    #ISR region
    if (anaType == "doISR"):
        factory.AddVariable("JetMET_dPhi  := jets.MET_dPhi[0]", 'F')
        factory.AddVariable("MET_JetPt_R  := sig.MetRel/jets.pt[0]", 'F')
        factory.AddVariable("l1Pt_JetPt_R := leps.pt[0]/jets.pt[0]", 'F')

    #factory.AddSpectator( "pt1 := leps.pt[0]" , 'F' )
    #factory.AddSpectator( "pt2 := leps.pt[1]" , 'F' )
    #factory.AddSpectator( "ID1 := int(leps.ID[0])" , 'I' )
    #factory.AddSpectator( "ID2 := int(leps.ID[1])" , 'I' )
    #factory.AddSpectator( "nCentralJets := Sum$(jets.pt>20 && abs(jets.eta)<2.4)" , 'I' )

    #FIXME
    setupXsecDB(pathToSUSYTools)
    from ROOT.SUSY import CrossSectionDB
    xsecDB = CrossSectionDB(pathToSUSYTools + "data/mc15_13TeV/")

    #read in training data
    openedInFileList = []

    # Read input sig
    sigList = open(sigList, "r")
    for infname in sigList:
        inFile = TFile.Open(infname[:-1])
        openedInFileList.append(inFile)

        hCutFlow = inFile.FindObjectAny("hCutFlow")
        mcEntry = hCutFlow.GetBinContent(1)

        #FIXME: hard coded extract runNum from filePath
        m = re.match(".*\.([0-9]{6})\..*", infname)
        runNum = int(m.groups()[0])
        xSECxEff = xsecDB.xsectTimesEff(runNum, 125) + xsecDB.xsectTimesEff(
            runNum, 127)  #125,127 is channel no.

        # Get the trees for training
        signal = inFile.Get("Data_")

        # Global event weights (see below for setting event-wise weights)
        #signalWeight = getXSECxEff(xsecDB, infname) * tarLumi / mcEntry
        #signalWeight = xSECxEff * tarLumi / mcEntry
        signalWeight = 1.0 * tarLumi / mcEntry  #treat diff SUSY scenario with equal weight
        if signalWeight <= 0:
            print "Encounter <=0 weight sample %s , skipped" % infname
            continue

        print "mc sig ", runNum, mcEntry, xSECxEff
        factory.AddSignalTree(signal, signalWeight)
    sigList.close()

    # Read input bkg
    bkgList = open(bkgList, "r")
    for infname in bkgList:
        inFile = TFile.Open(infname[:-1])
        openedInFileList.append(inFile)

        if "physics" in infname:
            #its real data
            print "data bkg", infname[:-1]
            background = inFile.Get("CFlip_")
            if background: factory.AddBackgroundTree(background, 1.0)
            background = inFile.Get("FakeLep_")
            if background: factory.AddBackgroundTree(background, 1.0)
        else:
            #its MC data
            hCutFlow = inFile.FindObjectAny("hCutFlow")
            mcEntry = hCutFlow.GetBinContent(1)

            #FIXME: hard coded extract runNum from filePath
            m = re.match(".*\.([0-9]{6})\..*", infname)
            runNum = int(m.groups()[0])
            xSECxEff = xsecDB.xsectTimesEff(runNum)

            # Get  trees for training
            background = inFile.Get("Data_")

            # Global event weights (see below for setting event-wise weights)
            backgroundWeight = xSECxEff * tarLumi / mcEntry
            if backgroundWeight <= 0:
                print "Encounter <=0 weight sample %s , skipped" % infname

            print "mc bkg ", runNum, mcEntry, xSECxEff
            factory.AddBackgroundTree(background, backgroundWeight)
    bkgList.close()

    # event-wise weights
    #factory.SetSignalWeightExpression( "weight" )
    #factory.SetBackgroundWeightExpression( "weight" )
    factory.SetSignalWeightExpression("ElSF*MuSF")
    factory.SetBackgroundWeightExpression(
        "(CFlipWeight0*FakeLepWeight0)!=1.0 ? CFlipWeight0*FakeLepWeight0  : !TMath::IsNaN(weight)? ElSF*MuSF*weight: 0.0"
    )

    # Apply additional cuts on the signal and background sample.
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    # trigCut   = "sig.trigCode!=0"

    #"HLT_mu24_iloose_L1MU15" for mumu emu, "HLT_e24_lhmedium_iloose_L1EM20VH" for ee
    trigCut = "((nMu>0) && (sig.trigCode & (1<<2))) || ((nMu==0) && (sig.trigCode & (1<<26)))"

    grlCut = "evtInfo.passGRL==1"
    wCut = "weight>0 && weight<1e9"
    tauCut = "1"  # "nTau==0"  FIXME nTau not properly filled in NTUP yet..
    bjetCut = "Sum$(jets.isBJet)==0"
    cosmicCut = "Sum$(leps.isCosmic)==0"

    htCut = "(Sum$(jets.pt) + Sum$(leps.pt))>40"
    posWCut = "FakeLepWeight0>0"

    isrCut = "Sum$(jets.pt>20 && abs(jets.eta)<2.4) %s" % (
        ">0" if anaType == "doISR" else "==0")  #nCentralJets>0 or ==0
    zMassCut = "!(int(abs(leps.ID[0])) == int(abs(leps.ID[1])) && fabs(l12.m - 91.1876)<=5)"

    #commonCut = "&&".join(["(%s)"%cut for cut in [trigCut , grlCut , bjetCut, cosmicCut]])
    commonCut = "&&".join([
        "(%s)" % cut for cut in
        [trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut]
    ])
    commonCut = TCut(commonCut)

    sigCut = "&&".join([
        "(%s)" % cut for cut in
        [trigCut, grlCut, zMassCut, isrCut, tauCut, bjetCut, cosmicCut]
    ])
    sigCut = TCut(sigCut)

    bkgCut = "&&".join([
        "(%s)" % cut for cut in [
            trigCut, grlCut, wCut, zMassCut, isrCut, tauCut, bjetCut,
            cosmicCut, posWCut
        ]
    ])
    bkgCut = TCut(bkgCut)
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples

    factory.PrepareTrainingAndTestTree(
        sigCut, bkgCut,
        "nTrain_Signal=0:nTrain_Background=0:nTest_Background=0:SplitMode=Random:NormMode=EqualNumEvents:!V"
    )
    #"nTrain_Signal=0:nTrain_Background=2000:SplitMode=Random:NormMode=EqualNumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "Cuts",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart")

    if "CutsD" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsD",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate"
        )

    if "CutsPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsPCA",
            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA"
        )

    if "CutsGA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsGA",
            "H:!V:FitMethod=GA:VarProp=FSmart:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95"
        )

    if "CutsSA" in mlist:
        factory.BookMethod(
            TMVA.Types.kCuts, "CutsSA",
            "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    # Likelihood ("naive Bayes estimator")
    if "Likelihood" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "Likelihood",
            "H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50"
        )

    # Decorrelated likelihood
    if "LikelihoodD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodD",
            "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate"
        )

    # PCA-transformed likelihood
    if "LikelihoodPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodPCA",
            "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA"
        )

    # Use a kernel density estimator to approximate the PDFs
    if "LikelihoodKDE" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodKDE",
            "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50"
        )

    # Use a variable-dependent mix of splines and kernel density estimator
    if "LikelihoodMIX" in mlist:
        factory.BookMethod(
            TMVA.Types.kLikelihood, "LikelihoodMIX",
            "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50"
        )

    # Test the multi-dimensional probability density estimator
    # here are the options strings for the MinMax and RMS methods, respectively:
    #      "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
    #      "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
    if "PDERS" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERS",
            "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600"
        )

    if "PDERSD" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSD",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate"
        )

    if "PDERSPCA" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDERS, "PDERSPCA",
            "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA"
        )

# Multi-dimensional likelihood estimator using self-adapting phase-space binning
    if "PDEFoam" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoam",
            "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T"
        )

    if "PDEFoamBoost" in mlist:
        factory.BookMethod(
            TMVA.Types.kPDEFoam, "PDEFoamBoost",
            "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T"
        )

    # K-Nearest Neighbour classifier (KNN)
    if "KNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kKNN, "KNN",
            "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim"
        )

    # H-Matrix (chi2-squared) method
    if "HMatrix" in mlist:
        factory.BookMethod(TMVA.Types.kHMatrix, "HMatrix", "!H:!V")

    # Linear discriminant (same as Fisher discriminant)
    if "LD" in mlist:
        factory.BookMethod(
            TMVA.Types.kLD, "LD",
            "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher discriminant (same as LD)
    if "Fisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "Fisher",
            "H:!V:Fisher:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10"
        )

    # Fisher with Gauss-transformed input variables
    if "FisherG" in mlist:
        factory.BookMethod(TMVA.Types.kFisher, "FisherG",
                           "H:!V:VarTransform=Gauss")

    # Composite classifier: ensemble (tree) of boosted Fisher classifiers
    if "BoostedFisher" in mlist:
        factory.BookMethod(
            TMVA.Types.kFisher, "BoostedFisher",
            "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2"
        )

    # Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
    if "FDA_MC" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MC",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1"
        )

    if "FDA_GA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1"
        )

    if "FDA_SA" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_SA",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale"
        )

    if "FDA_MT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch"
        )

    if "FDA_GAMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_GAMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim"
        )

    if "FDA_MCMT" in mlist:
        factory.BookMethod(
            TMVA.Types.kFDA, "FDA_MCMT",
            "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1)(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20"
        )

    # TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
    if "MLP" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLP",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:!UseRegulator"
        )

    if "MLPBFGS" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBFGS",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator"
        )

    if "MLPBNN" in mlist:
        factory.BookMethod(
            TMVA.Types.kMLP, "MLPBNN",
            "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator"
        )  # BFGS training with bayesian regulators

    # CF(Clermont-Ferrand)ANN
    if "CFMlpANN" in mlist:
        factory.BookMethod(TMVA.Types.kCFMlpANN, "CFMlpANN",
                           "!H:!V:NCycles=2000:HiddenLayers=N+1,N"
                           )  # n_cycles:#nodes:#nodes:...

    # Tmlp(Root)ANN
    if "TMlpANN" in mlist:
        factory.BookMethod(
            TMVA.Types.kTMlpANN, "TMlpANN",
            "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3"
        )  # n_cycles:#nodes:#nodes:...

    # Support Vector Machine
    if "SVM" in mlist:
        factory.BookMethod(TMVA.Types.kSVM, "SVM",
                           "Gamma=0.25:Tol=0.001:VarTransform=Norm")

    # Boosted Decision Trees
    if "BDTG" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTG",
            "!H:!V:NTrees=1000:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2"
        )

    if "BDT" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDT",
            "!H:!V:NTrees=850:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTB" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTB",
            "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20"
        )

    if "BDTD" in mlist:
        factory.BookMethod(
            TMVA.Types.kBDT, "BDTD",
            "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=2:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate"
        )

    # RuleFit -- TMVA implementation of Friedman's method
    if "RuleFit" in mlist:
        factory.BookMethod(
            TMVA.Types.kRuleFit, "RuleFit",
            "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02"
        )

    # --------------------------------------------------------------------------------------------------

    # ---- Now you can tell the factory to train, test, and evaluate the MVAs.

    # Train MVAs
    factory.TrainAllMethods()

    # Test MVAs
    factory.TestAllMethods()

    # Evaluate MVAs
    factory.EvaluateAllMethods()

    # Save the output.
    outputFile.Close()

    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
Exemplo n.º 20
0
def main():

    try:
        # Retrive command line options
        shortopts = "m:i:o:d:vh?"
        longopts = [
            "methods=", "inputfile=", "outputfile=", "datatype=", "verbose",
            "help", "usage"
        ]
        opts, args = getopt.getopt(sys.argv[1:], shortopts, longopts)

    except getopt.GetoptError:
        # Print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    treeNameSig = DEFAULT_TREESIG
    treeNameBkg = DEFAULT_TREEBKG
    methods = DEFAULT_METHODS
    directory = DEFAULT_DATA
    verbose = False

    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-d", "--datatype"):
            directory = a

        elif o in ("-v", "--verbose"):
            verbose = True

    # Print methods
    #take leading and trailing white space out
    methods = methods.strip(" ")
    mlist = methods.replace(' ', ',').split(',')
    print "=== TMVApplication: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut, TH1F, TStopwatch
    print("ROOT classes successfully imported!\n")  # DCS 17/06/2016
    # check ROOT version, give alarm if 5.18
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)

    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    tmvadir = "/home/dean/software/tmva/TMVA-v4.2.0/test"
    macro = os.path.join(tmvadir, "TMVAlogon.C")
    loadmacro = os.path.join(tmvadir, "TMVAGui.C")
    gROOT.SetMacroPath(tmvadir)
    gROOT.Macro(macro)
    gROOT.LoadMacro(loadmacro)
    print("ROOT macro path loaded correctly!\n")

    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Create the Reader object
    reader = TMVA.Reader("!Color")
    var1 = array('f', [0])
    var2 = array('f', [0])
    var3 = array('f', [0])
    var4 = array('f', [0])
    var5 = array('f', [0])
    var6 = array('f', [0])
    var7 = array('f', [0])
    var8 = array('f', [0])
    var9 = array('f', [0])
    var10 = array('f', [0])
    variables = [var1, var2, var3, var4, var5, var6, var7, var8, var9, var10]
    var_names = [
        'peaks', 'mean_peaks', 'integral', 'integral_over_peaks', 'max',
        'mean', 'max_over_mean', 'std_dev_peaks', 'entropy', 'ps_integral'
    ]
    #variables = [var1, var2, var3, var4]
    #var_names = ['var1', 'var2', 'var3', 'var4']
    for name, var in zip(var_names, variables):
        reader.AddVariable(name, var)
    print("Variables successfully loaded!\n")
    #reader.AddVariable("Nclusters.value", var1)
    #reader.AddVariable("(TMath::Log10(eventinfo_ALLOfflinePulseSeriesReco.tot_charge))*1000/eventinfo_ALLOfflinePulseSeriesReco.length" ,var2)
    #reader.AddVariable("MDCOGLaunches.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length",var3)
    #reader.AddVariable("Nclusters.value*1000./eventinfo_ALLOfflinePulseSeriesReco.length" ,var4)
    #reader.AddVariable("NSMT8TRIGGER.value/eventinfo_ALLOfflinePulseSeriesReco.nstrings",var5)
    #reader.AddVariable("MedianCluster.value",var6)

    # book the MVA methods
    #dir    = "weights/"
    #prefix = "TMVAClassification_"
    #
    #for m in mlist:
    #    print( m + " method", dir + prefix + m + ".weights.xml")
    #    reader.BookMVA( m + " method", dir + prefix + m + ".weights.xml" )

    weight_dir = "/home/dean/capstone/TMVA-classifier/weights/"
    weights = [f for f in os.listdir(weight_dir) if ".xml" in f]
    for i, f in enumerate(weights):
        reader.BookMVA("BDT_{}".format(i),
                       os.path.join(weight_dir, f))  #only care about BDT


#    reader.BookMVA("BDT","weights/TMVAClassification_BDT.weights.xml")

#######################################################################
# For an example how to apply your own plugin method, please see
# TMVA/macros/TMVApplication.C
#######################################################################

# Book output histograms
    nbin = 100

    histList = []
    for m in mlist:
        histList.append(TH1F(m, m, nbin, -3, 3))

    for h in histList:
        h.Fill(reader.EvaluateMVA(h.GetName() + " method"))

    # Book example histogram for probability (the other methods would be done similarly)
    if "Fisher" in mlist:
        probHistFi = TH1F("PROBA_MVA_Fisher", "PROBA_MVA_Fisher", nbin, 0, 1)
        rarityHistFi = TH1F("RARITY_MVA_Fisher", "RARITY_MVA_Fisher", nbin, 0,
                            1)

    filelist = glob(directory + "/" + "Level4b*.hdf")
    print 30 * "#"
    print "the filelist, ", filelist
    print 30 * "--"
    for file in filelist:

        try:
            startfile = tables.openFile(file, "a")
            #DELETE BDTs if they exist

            for var in startfile.root._v_children.keys():
                if re.match("BDT_", var):
                    startfile.removeNode("/", var)
                    startfile.removeNode("/__I3Index__", var)
            #NOW CLONE THE NODE

            for name in histList:
                startfile.copyNode("/__I3Index__/StdDCOGLaunches",
                                   "/__I3Index__", str(name.GetName()))
                startfile.copyNode("/StdDCOGLaunches", "/",
                                   str(name.GetName()))

            startfile.close()

            h5 = tables.openFile(file, 'r')
            mcog_over_t          = numpy.divide(h5.root.MDCOGLaunches.cols.value[:],\
                                                    h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.)
            q_over_t             = numpy.divide(numpy.log10(h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.tot_charge[:]),\
                                                    h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.)

            ncluster_over_t      = numpy.divide(h5.root.Nclusters.cols.value[:],\
                                             h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.length[:]/1000.)
            nsmt8_over_string    = numpy.divide(h5.root.NSMT8TRIGGER.cols.value[:],\
                                                    h5.root.eventinfo_ALLOfflinePulseSeriesReco.cols.nstrings[:])

            s1 = array('f', h5.root.Nclusters.cols.value[:])
            s2 = array('f', q_over_t)
            s3 = array('f', mcog_over_t[:])
            s4 = array('f', ncluster_over_t[:])
            s5 = array('f', nsmt8_over_string[:])
            s6 = array('f', h5.root.MedianCluster.cols.value[:])

            h5.close()

            result = numpy.zeros((len(histList), len(s1)),
                                 numpy.dtype([('Classifier', numpy.double)]))

            for ievt in range(len(s1)):
                #if ievt%1000 == 0:
                #    print "--- ... Processing event: %i" % ievt
                # Fill event in memory

                # Compute MVA input variables
                var1[0] = s1[ievt]
                var2[0] = s2[ievt]
                var3[0] = s3[ievt]
                var4[0] = s4[ievt]
                var5[0] = s5[ievt]
                var6[0] = s6[ievt]

                # Fill histograms with MVA outputs

                for j, h in enumerate(histList):
                    h.Fill(reader.EvaluateMVA(h.GetName() + " method"))
                    result[j][ievt]["Classifier"] = reader.EvaluateMVA(
                        h.GetName() + " method")

                endfile = tables.openFile(file, 'a')
                for k, name in enumerate(histList):
                    modifiedNode = endfile.getNode("/", str(name.GetName()))
                    modifiedNode.cols.value[ievt] = result[k][ievt][
                        "Classifier"]

                endfile.close()

            print time.strftime('Elapsed time - %H:%M:%S',
                                time.gmtime(time.clock()))
            #sanity check of the mva values writen in the hdf files

            #ifile=tables.openFile(file,'r')
            #   if len(ifile.root.BDT_400_20.cols.BDT) != len(ifile.root.MPEFit.cols.Zenith):
            #       ifile.close()
            #       print "Something wrong with file: ", k, j+1
            #exit()
            #os.system("rm "+"/data/icecube01/users/redlpete/IC59L2/TableIOL3/H5FilesIncludingScores/H5%0.2d%0.2d.hd5"%(k,j+1))
            #  ifile.close()
        except ImportError as exce:
            print "file does not exist", k, j + 1
            print exce

    exit()
    ifile = tables.openFile("test.h5", mode='a')

    class Score(IsDescription):
        score = Float64Col()

    group = ifile.createGroup("/", 'Background', 'Scoreinfo')

    table = ifile.createTable(group, 'score', Score, "Example")

    particle = table.row

    for n in range(len(result)):
        particle['score'] = result[n]
        particle.append()

    print "--- End of event loop: %s" % sw.Print()

    target = TFile("TMVApp1.root", "RECREATE")
    for h in histList:
        h.Write()

    target.Close()

    print "--- Created root file: \"TMVApp.root\" containing the MVA output histograms"
    print "==> TMVApplication is done!"
Exemplo n.º 21
0
def main():

    try:
        # retrive command line options
        shortopts  = "m:o:l:s:vh?"
        longopts   = ["methods=", "outputfile=", "lepton=", "signal=", "verbose", "help", "usage"]
        opts, args = getopt.getopt( sys.argv[1:], shortopts, longopts )

    except getopt.GetoptError:
        # print help information and exit:
        print "ERROR: unknown options in argument %s" % sys.argv[1:]
        usage()
        sys.exit(1)

    lepton = DEFAULT_LEPTON
    outfname    = DEFAULT_OUTFNAME
    methods     = DEFAULT_METHODS
    verbose     = False
    signal      = DEFAULT_SIGNAL
    for o, a in opts:
        if o in ("-?", "-h", "--help", "--usage"):
            usage()
            sys.exit(0)
        elif o in ("-m", "--methods"):
            methods = a
        elif o in ("-o", "--outputfile"):
            outfname = a
        elif o in ("-s", "--signal"):
            signal = a
        elif o in ("-v", "--verbose"):
            verbose = True
        elif o in ("-l", "--lepton"):
            if a == "electron":
                lepton = ELECTRON
            elif a == "muon":
                lepton = MUON
            else:
                print "*** Lepton must be 'electron' or 'muon ****"
                sys.exit(1)


    # Print methods
    mlist = methods.replace(' ',',').split(',')
    print "=== TMVAClassification: use method(s)..."
    for m in mlist:
        if m.strip() != '':
            print "=== - <%s>" % m.strip()

    # Import ROOT classes
    from ROOT import gSystem, gROOT, gApplication, TFile, TTree, TCut
    
    # check ROOT version, give alarm if 5.18 
    if gROOT.GetVersionCode() >= 332288 and gROOT.GetVersionCode() < 332544:
        print "*** You are running ROOT version 5.18, which has problems in PyROOT such that TMVA"
        print "*** does not run properly (function calls with enums in the argument are ignored)."
        print "*** Solution: either use CINT or a C++ compiled version (see TMVA/macros or TMVA/examples),"
        print "*** or use another ROOT version (e.g., ROOT 5.19)."
        sys.exit(1)
    
    # Logon not automatically loaded through PyROOT (logon loads TMVA library) load also GUI
    gROOT.SetMacroPath( "./" )
    gROOT.Macro       ( "./TMVAlogon.C" )    
    gROOT.LoadMacro   ( "./TMVAGui.C" )
    
    # Import TMVA classes from ROOT
    from ROOT import TMVA

    # Output file
    outputFile = TFile( outfname, 'RECREATE' )
    
    # Create instance of TMVA factory (see TMVA/macros/TMVAClassification.C for more factory options)
    # All TMVA output can be suppressed by removing the "!" (not) in 
    # front of the "Silent" argument in the option string
    factory = TMVA.Factory( "TMVAClassification", outputFile, 
                            "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" )

    # Set verbosity
    factory.SetVerbose( verbose )
    
    # let's open the input files
    if lepton == ELECTRON:

        print "Lepton is ELECTRON."
        path = "/data3/jmitrevs/lepphoton/elphoton_ntuple2/mergedFiles/"

        wino_600_200FileName = path + "wino_600_200_el.root"
        wino_600_500FileName = path + "wino_600_500_el.root"
        wino_1000_200FileName = path + "wino_1000_200_el.root"
        wino_1500_300FileName = path + "wino_1500_300_el.root"
        
        WlepnuFileName_Np0 = path + "Wenu_Np0.root"
        WlepnuFileName_Np1 = path + "Wenu_Np1.root"
        WlepnuFileName_Np2 = path + "Wenu_Np2.root"
        WlepnuFileName_Np3 = path + "Wenu_Np3.root"
        WlepnuFileName_Np4 = path + "Wenu_Np4.root"
        WlepnuFileName_Np5 = path + "Wenu_Np5.root"

        ZleplepFileName_Np0 = path + "Zee_Np0.root"
        ZleplepFileName_Np1 = path + "Zee_Np1.root"
        ZleplepFileName_Np2 = path + "Zee_Np2.root"
        ZleplepFileName_Np3 = path + "Zee_Np3.root"
        ZleplepFileName_Np4 = path + "Zee_Np4.root"
        ZleplepFileName_Np5 = path + "Zee_Np5.root"

        st_tchan_lepnuFileName   = path + "st_tchan_enu.root"
        st_schan_lepnuFileName   = path + "st_schan_enu.root"
        ZleplepgammaFileName = path + "Zeegamma.root"

    elif lepton == MUON:

        print "Lepton is MUON."
        path = "/data3/jmitrevs/lepphoton/muphoton2/mergedFiles/"

        wino_600_200FileName = path + "wino_600_200_mu.root"
        wino_600_500FileName = path + "wino_600_500_mu.root"
        wino_1000_200FileName = path + "wino_1000_200_mu.root"
        wino_1500_300FileName = path + "wino_1500_300_mu.root"

        WlepnuFileName_Np0 = path + "Wmunu_Np0.root"
        WlepnuFileName_Np1 = path + "Wmunu_Np1.root"
        WlepnuFileName_Np2 = path + "Wmunu_Np2.root"
        WlepnuFileName_Np3 = path + "Wmunu_Np3.root"
        WlepnuFileName_Np4 = path + "Wmunu_Np4.root"
        WlepnuFileName_Np5 = path + "Wmunu_Np5.root"

        ZleplepFileName_Np0 = path + "Zmumu_Np0.root"
        ZleplepFileName_Np1 = path + "Zmumu_Np1.root"
        ZleplepFileName_Np2 = path + "Zmumu_Np2.root"
        ZleplepFileName_Np3 = path + "Zmumu_Np3.root"
        ZleplepFileName_Np4 = path + "Zmumu_Np4.root"
        ZleplepFileName_Np5 = path + "Zmumu_Np5.root"

        st_tchan_lepnuFileName   = path + "st_tchan_munu.root"
        st_schan_lepnuFileName   = path + "st_schan_munu.root"
        ZleplepgammaFileName = path + "Zmumugamma.root"

    else:
        raise ValueError("Lepton has to be ELECTRON or MUON.")

    
    WtaunuFileName_Np0 = path + "Wtaunu_Np0.root"
    WtaunuFileName_Np1 = path + "Wtaunu_Np1.root"
    WtaunuFileName_Np2 = path + "Wtaunu_Np2.root"
    WtaunuFileName_Np3 = path + "Wtaunu_Np3.root"
    WtaunuFileName_Np4 = path + "Wtaunu_Np4.root"
    WtaunuFileName_Np5 = path + "Wtaunu_Np5.root"

    ZtautauFileName_Np0 = path + "Ztautau_Np0.root"
    ZtautauFileName_Np1 = path + "Ztautau_Np1.root"
    ZtautauFileName_Np2 = path + "Ztautau_Np2.root"
    ZtautauFileName_Np3 = path + "Ztautau_Np3.root"
    ZtautauFileName_Np4 = path + "Ztautau_Np4.root"
    ZtautauFileName_Np5 = path + "Ztautau_Np5.root"

    st_tchan_taunuFileName = path + "st_tchan_taunu.root"    
    st_schan_taunuFileName = path + "st_schan_taunu.root"
    st_WtFileName   = path + "st_Wt.root"    
    
    WgammaFileName_Np0 = path + "Wgamma_Np0.root"
    WgammaFileName_Np1 = path + "Wgamma_Np1.root"
    WgammaFileName_Np2 = path + "Wgamma_Np2.root"
    WgammaFileName_Np3 = path + "Wgamma_Np3.root"
    WgammaFileName_Np4 = path + "Wgamma_Np4.root"
    WgammaFileName_Np5 = path + "Wgamma_Np5.root"
    
    ttbarFileName = path + "ttbar.root"

    WWFileName = path + "WW.root"
    WZFileName = path + "WZ.root"
    ZZFileName = path + "ZZ.root"

    ZtautaugammaFileName = path + "Ztautaugamma.root"

    
    ###########################################

    
    wino_600_200File = TFile(wino_600_200FileName)
    wino_600_500File = TFile(wino_600_500FileName)
    wino_1000_200File = TFile(wino_1000_200FileName)
    wino_1500_300File = TFile(wino_1500_300FileName)

    WlepnuFile_Np0 = TFile(WlepnuFileName_Np0)
    WlepnuFile_Np1 = TFile(WlepnuFileName_Np1)
    WlepnuFile_Np2 = TFile(WlepnuFileName_Np2)
    WlepnuFile_Np3 = TFile(WlepnuFileName_Np3)
    WlepnuFile_Np4 = TFile(WlepnuFileName_Np4)
    WlepnuFile_Np5 = TFile(WlepnuFileName_Np5)
    
    WtaunuFile_Np0 = TFile(WtaunuFileName_Np0)
    WtaunuFile_Np1 = TFile(WtaunuFileName_Np1)
    WtaunuFile_Np2 = TFile(WtaunuFileName_Np2)
    WtaunuFile_Np3 = TFile(WtaunuFileName_Np3)
    WtaunuFile_Np4 = TFile(WtaunuFileName_Np4)
    WtaunuFile_Np5 = TFile(WtaunuFileName_Np5)

    ZleplepFile_Np0 = TFile(ZleplepFileName_Np0)
    ZleplepFile_Np1 = TFile(ZleplepFileName_Np1)
    ZleplepFile_Np2 = TFile(ZleplepFileName_Np2)
    ZleplepFile_Np3 = TFile(ZleplepFileName_Np3)
    ZleplepFile_Np4 = TFile(ZleplepFileName_Np4)
    ZleplepFile_Np5 = TFile(ZleplepFileName_Np5)
    
    ZtautauFile_Np0 = TFile(ZtautauFileName_Np0)
    ZtautauFile_Np1 = TFile(ZtautauFileName_Np1)
    ZtautauFile_Np2 = TFile(ZtautauFileName_Np2)
    ZtautauFile_Np3 = TFile(ZtautauFileName_Np3)
    ZtautauFile_Np4 = TFile(ZtautauFileName_Np4)
    ZtautauFile_Np5 = TFile(ZtautauFileName_Np5)
    
    WgammaFile_Np0 = TFile(WgammaFileName_Np0)
    WgammaFile_Np1 = TFile(WgammaFileName_Np1)
    WgammaFile_Np2 = TFile(WgammaFileName_Np2)
    WgammaFile_Np3 = TFile(WgammaFileName_Np3)
    WgammaFile_Np4 = TFile(WgammaFileName_Np4)
    WgammaFile_Np5 = TFile(WgammaFileName_Np5)
    
    ttbarFile = TFile(ttbarFileName)
    
    st_tchan_lepnuFile   = TFile(st_tchan_lepnuFileName)
    st_tchan_taunuFile = TFile(st_tchan_taunuFileName)
    
    st_schan_lepnuFile   = TFile(st_schan_lepnuFileName)
    st_schan_taunuFile = TFile(st_schan_taunuFileName)
    
    st_WtFile   = TFile(st_WtFileName)

    WWFile = TFile(WWFileName)
    WZFile = TFile(WZFileName)
    ZZFile = TFile(ZZFileName)
    
    ZleplepgammaFile = TFile(ZleplepgammaFileName)
    ZtautaugammaFile = TFile(ZtautaugammaFileName)

    ###########################################

    
    wino_600_200Tree = wino_600_200File.Get("GammaLepton")
    wino_600_500Tree = wino_600_500File.Get("GammaLepton")
    wino_1000_200Tree = wino_1000_200File.Get("GammaLepton")
    wino_1500_300Tree = wino_1500_300File.Get("GammaLepton")

    WlepnuTree_Np0 = WlepnuFile_Np0.Get("GammaLepton")
    WlepnuTree_Np1 = WlepnuFile_Np1.Get("GammaLepton")
    WlepnuTree_Np2 = WlepnuFile_Np2.Get("GammaLepton")
    WlepnuTree_Np3 = WlepnuFile_Np3.Get("GammaLepton")
    WlepnuTree_Np4 = WlepnuFile_Np4.Get("GammaLepton")
    WlepnuTree_Np5 = WlepnuFile_Np5.Get("GammaLepton")
    
    WtaunuTree_Np0 = WtaunuFile_Np0.Get("GammaLepton")
    WtaunuTree_Np1 = WtaunuFile_Np1.Get("GammaLepton")
    WtaunuTree_Np2 = WtaunuFile_Np2.Get("GammaLepton")
    WtaunuTree_Np3 = WtaunuFile_Np3.Get("GammaLepton")
    WtaunuTree_Np4 = WtaunuFile_Np4.Get("GammaLepton")
    WtaunuTree_Np5 = WtaunuFile_Np5.Get("GammaLepton")

    ZleplepTree_Np0 = ZleplepFile_Np0.Get("GammaLepton")
    ZleplepTree_Np1 = ZleplepFile_Np1.Get("GammaLepton")
    ZleplepTree_Np2 = ZleplepFile_Np2.Get("GammaLepton")
    ZleplepTree_Np3 = ZleplepFile_Np3.Get("GammaLepton")
    ZleplepTree_Np4 = ZleplepFile_Np4.Get("GammaLepton")
    ZleplepTree_Np5 = ZleplepFile_Np5.Get("GammaLepton")
    
    ZtautauTree_Np0 = ZtautauFile_Np0.Get("GammaLepton")
    ZtautauTree_Np1 = ZtautauFile_Np1.Get("GammaLepton")
    ZtautauTree_Np2 = ZtautauFile_Np2.Get("GammaLepton")
    ZtautauTree_Np3 = ZtautauFile_Np3.Get("GammaLepton")
    ZtautauTree_Np4 = ZtautauFile_Np4.Get("GammaLepton")
    ZtautauTree_Np5 = ZtautauFile_Np5.Get("GammaLepton")
    
    WgammaTree_Np0 = WgammaFile_Np0.Get("GammaLepton")
    WgammaTree_Np1 = WgammaFile_Np1.Get("GammaLepton")
    WgammaTree_Np2 = WgammaFile_Np2.Get("GammaLepton")
    WgammaTree_Np3 = WgammaFile_Np3.Get("GammaLepton")
    WgammaTree_Np4 = WgammaFile_Np4.Get("GammaLepton")
    WgammaTree_Np5 = WgammaFile_Np5.Get("GammaLepton")
    
    ttbarTree = ttbarFile.Get("GammaLepton")
    
    st_tchan_lepnuTree   = st_tchan_lepnuFile.Get("GammaLepton")
    st_tchan_taunuTree = st_tchan_taunuFile.Get("GammaLepton")
    
    st_schan_lepnuTree   = st_schan_lepnuFile.Get("GammaLepton")
    st_schan_taunuTree = st_schan_taunuFile.Get("GammaLepton")
    
    st_WtTree   = st_WtFile.Get("GammaLepton")

    WWTree = WWFile.Get("GammaLepton")
    WZTree = WZFile.Get("GammaLepton")
    ZZTree = ZZFile.Get("GammaLepton")
    
    ZleplepgammaTree = ZleplepgammaFile.Get("GammaLepton")
    ZtautaugammaTree = ZtautaugammaFile.Get("GammaLepton")
    
    ##############################
    # and now the weights

    # wino_600_200_scale = 7.005
    # wino_600_500_scale = 3.03021
    # wino_1000_200_scale = 4.1325
    # wino_1500_300_scale = 0.16
    # Wlepnu_Np0_scale = 12.0052623622
    # Wlepnu_Np1_scale = 3.13076456857
    # Wlepnu_Np2_scale = 0.60296853897
    # Wlepnu_Np3_scale = 0.603183318846
    # Wlepnu_Np4_scale = 0.62088
    # Wlepnu_Np5_scale = 0.600008571551
    # Wtaunu_Np0_scale = 12.1457006649
    # Wtaunu_Np1_scale = 3.12868868923
    # Wtaunu_Np2_scale = 0.602359552172
    # Wtaunu_Np3_scale = 0.602586672951
    # Wtaunu_Np4_scale = 0.62088496708
    # Wtaunu_Np5_scale = 0.638769230769
    # Zleplep_Np0_scale = 0.631361988532
    # Zleplep_Np1_scale = 0.629541167757
    # Zleplep_Np2_scale = 0.625618828688
    # Zleplep_Np3_scale = 0.634090909091
    # Zleplep_Np4_scale = 0.6
    # Zleplep_Np5_scale = 0.51875
    # Ztautau_Np0_scale = 0.631228327261
    # Ztautau_Np1_scale = 0.631347664299
    # Ztautau_Np2_scale = 0.622916409433
    # Ztautau_Np3_scale = 0.640077378243
    # Ztautau_Np4_scale = 0.581269375646
    # Ztautau_Np5_scale = 0.48125
    # Wgamma_Np0_scale = 0.0129441737417
    # Wgamma_Np1_scale = 0.0635170304401
    # Wgamma_Np2_scale = 0.140920227273
    # Wgamma_Np3_scale = 0.140622611111
    # Wgamma_Np4_scale = 0.134589
    # Wgamma_Np5_scale = 0.123308
    # ttbar_scale = 0.0384505023442
    # st_tchan_lepnu_scale = 0.200916540624
    # st_tchan_taunu_scale = 0.201132004918
    # st_schan_lepnu_scale = 0.0092735093327
    # st_schan_taunu_scale = 0.00926981472204
    # st_Wt_scale = 0.0916407781992
    # WW_scale = 0.0342151663714
    # WZ_scale = 0.110873818259
    # ZZ_scale = 0.0252773011092
    # Zleplepgamma_scale = 0.963
    # Ztautaugamma_scale = 0.941960800016

    #################ntuple_pt25
    # wino_600_200_scale = 1.401
    # wino_600_500_scale = 3.03021
    # wino_1000_200_scale = 4.1325
    # wino_1500_300_scale = 0.16
    # Wlepnu_Np0_scale = 12.0052623622
    # Wlepnu_Np1_scale = 3.13076456857
    # Wlepnu_Np2_scale = 0.60296853897
    # Wlepnu_Np3_scale = 0.603183318846
    # Wlepnu_Np4_scale = 0.62088
    # Wlepnu_Np5_scale = 0.600008571551
    # Wtaunu_Np0_scale = 12.1457006649
    # Wtaunu_Np1_scale = 3.12868868923
    # Wtaunu_Np2_scale = 0.602359552172
    # Wtaunu_Np3_scale = 0.602586672951
    # Wtaunu_Np4_scale = 0.62088496708
    # Wtaunu_Np5_scale = 0.638769230769
    # Zleplep_Np0_scale = 0.631361988532
    # Zleplep_Np1_scale = 0.629541167757
    # Zleplep_Np2_scale = 0.625618828688
    # Zleplep_Np3_scale = 0.634090909091
    # Zleplep_Np4_scale = 0.6
    # Zleplep_Np5_scale = 0.51875
    # Ztautau_Np0_scale = 0.631228327261
    # Ztautau_Np1_scale = 0.631347664299
    # Ztautau_Np2_scale = 0.622916409433
    # Ztautau_Np3_scale = 0.640077378243
    # Ztautau_Np4_scale = 0.581269375646
    # Ztautau_Np5_scale = 0.48125
    # Wgamma_Np0_scale = 1.08706263428
    # Wgamma_Np1_scale = 0.734676952566
    # Wgamma_Np2_scale = 0.733754057143
    # Wgamma_Np3_scale = 0.149752323594
    # Wgamma_Np4_scale = 0.157524392683
    # Wgamma_Np5_scale = 0.1281354
    # ttbar_scale = 0.0384505023442
    # st_tchan_lepnu_scale = 0.200916540624
    # st_tchan_taunu_scale = 0.201132004918
    # st_Wt_scale = 0.0916407781992
    # WW_scale = 0.0342151663714
    # WZ_scale = 0.110873818259
    # ZZ_scale = 0.0252773011092
    # Zleplepgamma_scale = 0.963
    # Ztautaugamma_scale = 0.941960800016
    # gamma_Np1_scale = 4.06453310851
    # gamma_Np2_scale = 3.3709968686
    # gamma_Np3_scale = 1.38728943513
    # gamma_Np4_scale = 1.41464077802
    # gamma_Np5_scale = 1.23661096137


    if lepton == ELECTRON:
        wino_600_200_scale = 0.291875
        wino_600_500_scale = 2.69352
        wino_1000_200_scale = 4.1325
        wino_1500_300_scale = 0.0093269
        wino_1000_100_scale = 69.5
        wino_800_700_scale = 0.2328
        Wlepnu_Np0_scale = 12.0052623622
        Wlepnu_Np1_scale = 3.13076456857
        Wlepnu_Np2_scale = 0.60296853897
        Wlepnu_Np3_scale = 0.603183318846
        Wlepnu_Np4_scale = 0.62088
        Wlepnu_Np5_scale = 0.600008571551
        Wtaunu_Np0_scale = 12.1457006649
        Wtaunu_Np1_scale = 3.12868868923
        Wtaunu_Np2_scale = 0.602359552172
        Wtaunu_Np3_scale = 0.602586672951
        Wtaunu_Np4_scale = 0.62088496708
        Wtaunu_Np5_scale = 0.638769230769
        Zleplep_Np0_scale = 0.631361988532
        Zleplep_Np1_scale = 0.629541167757
        Zleplep_Np2_scale = 0.625618828688
        Zleplep_Np3_scale = 0.634090909091
        Zleplep_Np4_scale = 0.6
        Zleplep_Np5_scale = 0.51875
        Ztautau_Np0_scale = 0.631228327261
        Ztautau_Np1_scale = 0.631347664299
        Ztautau_Np2_scale = 0.622916409433
        Ztautau_Np3_scale = 0.640077378243
        Ztautau_Np4_scale = 0.581269375646
        Ztautau_Np5_scale = 0.48125
        Wgamma_Np0_scale = 0.0132834003639
        Wgamma_Np1_scale = 0.0651816146862
        Wgamma_Np2_scale = 0.144613309091
        Wgamma_Np3_scale = 0.144307893333
        Wgamma_Np4_scale = 0.13811616
        Wgamma_Np5_scale = 0.12653952
        ttbar_scale = 0.0384505023442
        st_tchan_lepnu_scale = 0.200916540624
        st_tchan_taunu_scale = 0.201132004918
        st_Wt_scale = 0.0916407781992
        WW_scale = 0.0342151663714
        WZ_scale = 0.110873818259
        ZZ_scale = 0.0252773011092
        Zleplepgamma_scale = 0.963
        Ztautaugamma_scale = 0.941960800016
        gamma_Np1_scale = 4.17064063358
        gamma_Np2_scale = 3.35244054801
        gamma_Np3_scale = 1.36994217452
        gamma_Np4_scale = 1.41464077802
        gamma_Np5_scale = 1.23661096137
    elif lepton == MUON:
        wino_600_200_scale = 0.291875
        Wlepnu_Np0_scale = 11.9925371604
        Wlepnu_Np1_scale = 3.13058966
        Wlepnu_Np2_scale = 0.601616497017
        Wlepnu_Np3_scale = 0.605913424797
        Wlepnu_Np4_scale = 0.606001176701
        Wlepnu_Np5_scale = 0.593142857143
        Wtaunu_Np0_scale = 12.1457006649
        Wtaunu_Np1_scale = 3.12868868923
        Wtaunu_Np2_scale = 0.602359552172
        Wtaunu_Np3_scale = 0.602586672951
        Wtaunu_Np4_scale = 0.62088496708
        Wtaunu_Np5_scale = 0.638769230769
        Zleplep_Np0_scale = 0.631664271554
        Zleplep_Np1_scale = 0.628327597475
        Zleplep_Np2_scale = 0.62551337696
        Zleplep_Np3_scale = 0.635795454545
        Zleplep_Np4_scale = 0.572916666667
        Zleplep_Np5_scale = 0.48125
        Ztautau_Np0_scale = 0.631228327261
        Ztautau_Np1_scale = 0.631347664299
        Ztautau_Np2_scale = 0.622916409433
        Ztautau_Np3_scale = 0.640077378243
        Ztautau_Np4_scale = 0.581269375646
        Ztautau_Np5_scale = 0.48125
        Wgamma_Np0_scale = 0.0132834003639
        Wgamma_Np1_scale = 0.0651816146862
        Wgamma_Np2_scale = 0.144613309091
        Wgamma_Np3_scale = 0.144307893333
        Wgamma_Np4_scale = 0.13811616
        Wgamma_Np5_scale = 0.12653952
        ttbar_scale = 0.0384505023442
        st_tchan_lepnu_scale = 0.201919368378
        st_tchan_taunu_scale = 0.201132004918
        st_Wt_scale = 0.0916407781992
        WW_scale = 0.0342151663714
        WZ_scale = 0.110873818259
        ZZ_scale = 0.0252773011092
        Zleplepgamma_scale = 0.963963963964
        Ztautaugamma_scale = 0.941960800016
        gamma_Np1_scale = 4.08704733658
        gamma_Np2_scale = 3.35244054801
        gamma_Np3_scale = 1.36994217452
        gamma_Np4_scale = 1.41464077802
        gamma_Np5_scale = 1.23661096137
        

    if signal == "600_200":
        factory.AddSignalTree(wino_600_200Tree, wino_600_200_scale)
    elif signal == "600_500":
        factory.AddSignalTree(wino_600_500Tree, wino_600_500_scale)
    elif signal == "1000_200":
        factory.AddSignalTree(wino_1000_200Tree, wino_1000_200_scale)
    elif signal == "1500_300":
        factory.AddSignalTree(wino_1500_300Tree, wino_1500_300_scale)
    else:
        print "*** signal designation not supported: %s ****" % signal
        sys.exit(1)

    factory.AddBackgroundTree(WlepnuTree_Np0, Wlepnu_Np0_scale)
    factory.AddBackgroundTree(WlepnuTree_Np1, Wlepnu_Np1_scale)
    factory.AddBackgroundTree(WlepnuTree_Np2, Wlepnu_Np2_scale)
    factory.AddBackgroundTree(WlepnuTree_Np3, Wlepnu_Np3_scale)
    factory.AddBackgroundTree(WlepnuTree_Np4, Wlepnu_Np4_scale)
    factory.AddBackgroundTree(WlepnuTree_Np5, Wlepnu_Np5_scale)
    
    #factory.AddBackgroundTree(WtaunuTree_Np0, Wtaunu_Np0_scale)
    factory.AddBackgroundTree(WtaunuTree_Np1, Wtaunu_Np1_scale)
    factory.AddBackgroundTree(WtaunuTree_Np2, Wtaunu_Np2_scale)
    factory.AddBackgroundTree(WtaunuTree_Np3, Wtaunu_Np3_scale)
    factory.AddBackgroundTree(WtaunuTree_Np4, Wtaunu_Np4_scale)
    factory.AddBackgroundTree(WtaunuTree_Np5, Wtaunu_Np5_scale)

    # factory.AddBackgroundTree(ZleplepTree_Np0, Zleplep_Np0_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np1, Zleplep_Np1_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np2, Zleplep_Np2_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np3, Zleplep_Np3_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np4, Zleplep_Np4_scale)
    # factory.AddBackgroundTree(ZleplepTree_Np5, Zleplep_Np5_scale)
    
    # factory.AddBackgroundTree(ZtautauTree_Np0, Ztautau_Np0_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np1, Ztautau_Np1_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np2, Ztautau_Np2_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np3, Ztautau_Np3_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np4, Ztautau_Np4_scale)
    # factory.AddBackgroundTree(ZtautauTree_Np5, Ztautau_Np5_scale)
    
    factory.AddBackgroundTree(WgammaTree_Np0, Wgamma_Np0_scale)
    factory.AddBackgroundTree(WgammaTree_Np1, Wgamma_Np1_scale)
    factory.AddBackgroundTree(WgammaTree_Np2, Wgamma_Np2_scale)
    factory.AddBackgroundTree(WgammaTree_Np3, Wgamma_Np3_scale)
    factory.AddBackgroundTree(WgammaTree_Np4, Wgamma_Np4_scale)
    factory.AddBackgroundTree(WgammaTree_Np5, Wgamma_Np5_scale)
    
    factory.AddBackgroundTree(ttbarTree, ttbar_scale)
    
    factory.AddBackgroundTree(st_tchan_lepnuTree, st_tchan_lepnu_scale)
    factory.AddBackgroundTree(st_tchan_taunuTree, st_tchan_taunu_scale)
    
    # factory.AddBackgroundTree(st_schan_lepnuTree, st_schan_lepnu_scale)
    # factory.AddBackgroundTree(st_schan_taunuTree, st_schan_taunu_scale)
    
    factory.AddBackgroundTree(st_WtTree, st_Wt_scale)

    factory.AddBackgroundTree(WWTree, WW_scale)
    factory.AddBackgroundTree(WZTree, WZ_scale)
    factory.AddBackgroundTree(ZZTree, ZZ_scale)
    
    factory.AddBackgroundTree(ZleplepgammaTree, Zleplepgamma_scale)
    factory.AddBackgroundTree(ZtautaugammaTree, Ztautaugamma_scale)
    


    # note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
    # [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
    factory.AddVariable( "MET := sqrt(Metx*Metx+Mety*Mety)", 'F' )
    # factory.AddVariable( "HT", 'F' )
    factory.AddVariable( "PhotonPt[0]", 'F' )
    #factory.AddVariable( "ElectronPt[0]", 'F' )
    if lepton == ELECTRON:
        factory.AddVariable( "mTel", 'F' )
    else:
        factory.AddVariable( "mTmu", 'F' )
    #factory.AddVariable( "abs(PhotonEta[0])", 'F' )
    #factory.AddVariable( "abs(ElectronEta[0])", 'F' )

    # Apply additional cuts on the signal and background sample. 
    # example for cut: mycut = TCut( "abs(var1)<0.5 && abs(var2-0.5)<1" )
    #mycutSig = TCut( "abs(PhotonEta[0]) < 2.01 && abs(ElectronEta[0]) < 2.01" ) 

    if lepton == ELECTRON:
        mycutSig = TCut( "sqrt((PhotonEta[0]-ElectronEta[0])*(PhotonEta[0]-ElectronEta[0]) + (PhotonPhi[0]-ElectronPhi[0])*(PhotonPhi[0]-ElectronPhi[0])) > 0.7")
    else:
        mycutSig = TCut( "sqrt((PhotonEta[0]-MuonEta[0])*(PhotonEta[0]-MuonEta[0]) + (PhotonPhi[0]-MuonPhi[0])*(PhotonPhi[0]-MuonPhi[0])) > 0.7")
    #mycutSig = TCut( "PhotonPt[0] > 85000" ) 
    mycutBkg = mycutSig 
    
    # Here, the relevant variables are copied over in new, slim trees that are
    # used for TMVA training and testing
    # "SplitMode=Random" means that the input events are randomly shuffled before
    # splitting them into training and test samples
    factory.PrepareTrainingAndTestTree( mycutSig, mycutBkg,
                                        "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" )

    # --------------------------------------------------------------------------------------------------

    # ---- Book MVA methods
    #
    # please lookup the various method configuration options in the corresponding cxx files, eg:
    # src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
    # it is possible to preset ranges in the option string in which the cut optimisation should be done:
    # "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable

    # Cut optimisation
    if "Cuts" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "Cuts",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" )

    if "CutsD" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsD",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" )

    if "CutsPCA" in mlist:
        factory.BookMethod( TMVA.Types.kCuts, "CutsPCA",
                            "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" )

    # if "CutsGA" in mlist:
    #     factory.BookMethod( TMVA.Types.kCuts, "CutsGA",
    #                         "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" )

    # if "CutsSA" in mlist:
    #     factory.BookMethod( TMVA.Types.kCuts, "CutsSA",
    #                         "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" )


    # --------------------------------------------------------------------------------------------------
            
    # ---- Now you can tell the factory to train, test, and evaluate the MVAs. 

    # Train MVAs
    factory.TrainAllMethods()
    
    # Test MVAs
    factory.TestAllMethods()
    
    # Evaluate MVAs
    factory.EvaluateAllMethods()    
    
    # Save the output.
    outputFile.Close()
    
    print "=== wrote root file %s\n" % outfname
    print "=== TMVAClassification is done!\n"
    
    # open the GUI for the result macros    
    gROOT.ProcessLine( "TMVAGui(\"%s\")" % outfname )
    
    # keep the ROOT thread running
    gApplication.Run()