Python evaluate_reader示例，root_numpy.tmva.evaluate_reader Python示例

示例#1

0

显示文件

文件： _tmvaReader.py 项目： arogozhnikov/rep

def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader, info.method_name, data, aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions

示例#2

0

显示文件

def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader,
                                      info.method_name,
                                      data,
                                      aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions

示例#3

0

显示文件

文件： EvalTMVA.py 项目： vananiashvili/4topAnalysis

 def ImportXml(self,train,test,ModelName):
     """ Import the given Xml model """
     reader = ROOT.TMVA.Reader()
     #Filling reader used for eval
     for Var in self.DataSet.LVariables:
         reader.AddVariable(Var,array('f', [0.]))
     xml = './dataset/weights/TMVAClassification_'+ModelName+'.weights.xml'                          
     reader.BookMVA(ModelName,xml)
     return evaluate_reader(reader,ModelName,test.Events), evaluate_reader(reader,ModelName,train.Events)                   # retuns the Classifcation Score

示例#4

0

显示文件

def plot_clf_results_tmva(reader,
                          x_train,
                          y_train,
                          w_train,
                          x_test,
                          y_test,
                          w_test,
                          nbins=30,
                          figname="BDTOutput_tmva.png",
                          verbose="False"):

    decisions = []
    weights = []
    for x, y, w in ((x_train, y_train, w_train), (x_test, y_test, w_test)):
        w *= 1. / np.sum(w)
        dsig = evaluate_reader(reader, "BDT", x[y > 0.5])
        wsig = w[y > 0.5]
        dbkg = evaluate_reader(reader, "BDT", x[y < 0.5])
        wbkg = w[y < 0.5]
        decisions += [dsig, dbkg]
        weights += [wsig, wbkg]

    plot_clf_results(tuple(decisions), tuple(weights), nbins, figname, verbose)

示例#5

0

显示文件

def get_tmva_test_results(directory, variables, name=''):
    # TMVA reader
    reader = r.TMVA.Reader()
    for var in variables:
        #vtype = 'i' if var in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'
        reader.AddVariable(var, array('f', [0]))

    reader.BookMVA('BDT', directory + 'weights/TMVA_BDT.weights.xml')

    # Get testing dataset
    filename = directory + 'tmva_output.root'
    x_test = rec2array(root2array(filename, 'TestTree', variables))
    y_test = 1 - root2array(filename, 'TestTree', 'classID')
    w_test = root2array(filename, 'TestTree', 'weight')

    y_decision = evaluate_reader(reader, "BDT", x_test)
    return (y_test, y_decision, w_test, name)

示例#6

0

显示文件

文件： training.py 项目： cms-ttH/ttH-TauRoast

def evaluate(config, tree, names, transform=None):
    output = []
    dtype = []
    for name in names:
        setup = load(config, name.split("_")[1])
        data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"]))
        if name.startswith("sklearn"):
            fn = os.path.join(config["mvadir"], name + ".pkl")
            with open(fn, 'rb') as fd:
                bdt, label = pickle.load(fd)
            scores = []
            if len(data) > 0:
                scores = bdt.predict_proba(data)[:, 1]
            output += [scores]
            dtype += [(name, 'float64')]

        fn = os.path.join(config["mvadir"], name + ".xml")
        reader = r.TMVA.Reader("Silent")
        for var in setup['variables']:
            reader.AddVariable(var, array('f', [0.]))
        reader.BookMVA("BDT", fn)
        scores = evaluate_reader(reader, "BDT", data)
        output += [scores]
        dtype += [(name.replace("sklearn", "tmvalike"), 'float64')]

    f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ")
    if f.IsOpen():
        likelihood = f.Get("hTargetBinning")

        def lh(values):
            return likelihood.GetBinContent(likelihood.FindBin(*values))
        indices = dict((v, n) for n, (v, _) in enumerate(dtype))
        tt = output[indices['tmvalike_tt']]
        ttZ = output[indices['tmvalike_ttZ']]
        if len(tt) == 0:
            output += [[]]
        else:
            output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)]
        dtype += [('tmvalike_likelihood', 'float64')]
        f.Close()

    data = np.array(zip(*output), dtype)
    tree.mva(array2tree(data))

示例#7

0

显示文件

文件： root_tmva.py 项目： jemrobinson/bbyy_jet_classifier

    def test(self, test_data, classification_variables, training_sample):
        """
        Definition:
        -----------
            Testing method for RootTMVA; it loads the latest model from the "weights" sub-folder
        Args:
        -----
            data = dictionary, containing "X", "y", "w" for the set to evaluate performance on, where:
                X = ndarray of dim (# examples, # features)
                y = array of dim (# examples) with target values
                w = array of dim (# examples) with event weights
            classification_variables = list of names of variables used for classification
            training_sample = string that specifies the file name of the sample to use as a training (e.g. "SM_merged" or "X350_hh")

        Returns:
        --------
            yhat = the array of BDT outputs, of dimensions (n_events)
        """
        logging.getLogger("root_tmva").info("Evaluating performance...")

        # -- Construct reader and add variables to it:
        logging.getLogger("root_tmva").info("Construct TMVA reader and add variables to it")
        reader = TMVA.Reader()
        for v_name in classification_variables:
            reader.AddVariable(v_name, array.array("f", [0]))

        # -- Load TMVA results
        reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_BDT.weights.xml"))
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_Fisher.weights.xml"))
        # -- Load skl_BDT results (used for testing only)
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, "skl_BDT", "classifier", "skl_BDT_TMVA.weights.xml"))

        yhat = evaluate_reader(reader, "BDT", test_data["X"])
        # -- add binary classification labels
        yhat_class = np.zeros(len(yhat))
        yhat_class[yhat >= 0] = 1
        return yhat, yhat_class

示例#8

0

显示文件

文件： plot_multiclass.py 项目： vvolkl/root_numpy

# Train an MLP
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'MLP', 'MLP',
           'NeuronType=tanh:NCycles=200:HiddenLayers=N+2,2:'
           'TestRate=5:EstimatorType=MSE')
factory.TrainAllMethods()

# Classify the test dataset with the BDT
reader = TMVA.Reader()
for n in range(2):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('MLP', 'weights/classifier_MLP.weights.xml')
class_proba = evaluate_reader(reader, 'MLP', X_test)

# Plot the decision boundaries
plot_colors = "rgb"
plot_step = 0.02
class_names = "ABC"
cmap = plt.get_cmap('Paired')

fig = plt.figure(figsize=(5, 5))
fig.patch.set_alpha(0)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Z = evaluate_reader(reader, 'MLP', np.c_[xx.ravel(), yy.ravel()])

示例#9

0

显示文件

文件： testRootNumpy.py 项目： BetterWang/cmssw

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'BDT', 'BDT1',
                   'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
BookMethod(data, 'BDT', 'BDT2',
                   'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')

示例#10

0

显示文件

文件： plot_twoclass.py 项目： tibristo/BosonTagger

factory.BookMethod('BDT', 'BDT',
                   'NTrees=100:nEventsMin=30:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=10:PruneStrength=2:PruneMethod=ExpectedError')
#factory.BookMethod('kBDT', 'BDT',
#                   'Fisher:VarTransform=None:CreateMVAPdfs:'
#                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
#                   'NsmoothMVAPdf=10')

print 'added all the events and booked the method'
factory.TrainAllMethods()
print 'trained'
# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('BDT', 'weights/classifier_BDT.weights.xml')
twoclass_output = evaluate_reader(reader, 'BDT', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
'''
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

示例#11

0

显示文件

# The following line is necessary if events have been added individually:
factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')

# Train a classifier
factory.BookMethod('Fisher', 'Fisher',
                   'Fisher:VarTransform=None:CreateMVAPdfs:'
                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
                   'NsmoothMVAPdf=10')
factory.TrainAllMethods()

# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('Fisher', 'weights/classifier_Fisher.weights.xml')
twoclass_output = evaluate_reader(reader, 'Fisher', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

示例#12

0

显示文件

文件： evaluate_and_store.py 项目： radicaleid/btagging

def main(weights, picklename, filename, treename='bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights)
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [
        var.attrib['Label']
        for var in root.findall('Variables')[0].findall('Variable')
    ]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)

    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(
        filename,
        treename,
        branches=[
            'jet_pt', 'jet_eta', 'jet_phi', 'jet_m', 'jet_ip2d_pu',
            'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc',
            'jet_ip3d_pb', 'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z',
            'jet_sv1_ntrkv', 'jet_sv1_m', 'jet_sv1_efc', 'jet_sv1_n2t',
            'jet_sv1_sig3d', 'jet_jf_n2t', 'jet_jf_ntrkAtVx', 'jet_jf_nvtx',
            'jet_jf_nvtx1t', 'jet_jf_m', 'jet_jf_efc', 'jet_jf_sig3d',
            'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz'
        ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt': 'jet_pt',
        'abs(eta)': 'abs(jet_eta)',
        'ip2': 'jet_ip2',
        'ip2_c': 'jet_ip2_c',
        'ip2_cu': 'jet_ip2_cu',
        'ip3': 'jet_ip3',
        'ip3_c': 'jet_ip3_c',
        'ip3_cu': 'jet_ip3_cu',
        'sv1_ntkv': 'jet_sv1_ntrkv',
        'sv1_mass': 'jet_sv1_m',
        'sv1_efrc': 'jet_sv1_efc',
        'sv1_n2t': 'jet_sv1_n2t',
        'sv1_Lxy': 'jet_sv1_Lxy',
        'sv1_L3d': 'jet_sv1_L3d',
        'sv1_sig3': 'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv': 'jet_jf_n2t',
        'jf_ntrkv': 'jet_jf_ntrkAtVx',
        'jf_nvtx': 'jet_jf_nvtx',
        'jf_nvtx1t': 'jet_jf_nvtx1t',
        'jf_mass': 'jet_jf_m',
        'jf_efrc': 'jet_jf_efc',
        'jf_dR': 'jet_jf_dR',
        'jf_sig3': 'jet_jf_sig3d'
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0]))
    reader.BookMVA('BDTG akt2', weights)

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(
            np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb'))

    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0

示例#13

0

显示文件

文件： collections.py 项目： Pmeiring/NtupleTools

def cl3d_fixtures(clusters, tcs):
    # print clusters.columns
    # for backward compatibility
    if clusters.empty:
        return clusters

    clusters.rename(columns={
        'clusters_id': 'clusters',
        'clusters_n': 'nclu'
    },
                    inplace=True)
    # clusters['hwQual'] = clusters['quality']
    do_compute_hoe = False
    do_compute_layer_energy = False
    if 'hoe' not in clusters.columns:
        do_compute_hoe = True
    if 'layer_energy' not in clusters.columns:
        do_compute_layer_energy = True

    def compute_layer_energy3(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        results = []
        if do_layer_energy:
            results.append(hist)
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            results.append(hoe)
        return results

    def compute_layer_energy2(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        if do_layer_energy:
            cluster['layer_energy'] = hist
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            cluster['hoe'] = hoe
        return cluster

    if do_compute_hoe or do_compute_layer_energy:
        # clusters = clusters.apply(lambda cl: compute_layer_energy2(cl,
        #                                                            do_compute_layer_energy,
        #                                                            do_compute_hoe), axis=1)
        new_columns = []
        if do_compute_layer_energy:
            new_columns.append('layer_energy')
        if do_compute_hoe:
            new_columns.append('hoe')
        clusters[new_columns] = clusters.apply(
            lambda cl: compute_layer_energy3(cl, do_compute_layer_energy,
                                             do_compute_hoe),
            result_type='expand',
            axis=1)

    clusters['ptem'] = clusters.pt / (1 + clusters.hoe)
    clusters['eem'] = clusters.energy / (1 + clusters.hoe)
    if False:
        clusters['bdt_pu'] = rnptmva.evaluate_reader(
            classifiers.mva_pu_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])

        clusters['bdt_pi'] = rnptmva.evaluate_reader(
            classifiers.mva_pi_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])
    return clusters

示例#14

0

显示文件

    print 'Training takes ', stop - start, 's'
if not args.quiet:
    print 'Training done'
    print 'Outputs in directory : ', args.outdir

# evaluate training results
if args.evaluate:

    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    reader = TMVA.Reader()
    for v in var:
        #vtype = 'i' if v in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'

        reader.AddVariable(v, array('f', [0]))

    reader.BookMVA('BDT', args.outdir + 'weights/TMVA_BDT.weights.xml')
    y_decision = evaluate_reader(reader, "BDT", x_test)
    util.plot_clf_results_tmva(reader,
                               x_train,
                               y_train,
                               w_train,
                               x_test,
                               y_test,
                               w_test,
                               figname=args.outdir + "bdtoutput.png",
                               verbose=(not args.quiet))
    util.plot_roc((y_test, y_decision, w_test),
                  figname=args.outdir + 'roc.png')

示例#15

0

显示文件

文件： testRootNumpy.py 项目： archiesharma/muonPhase2

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
    data, 'BDT', 'BDT1', 'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
BookMethod(
    data, 'BDT', 'BDT2', 'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')

示例#16

0

显示文件

文件： test_tmva.py 项目： sznajder/hepaccelerate-cms

reader2j.AddVariable("hmmphics", arr())
reader2j.AddVariable("j1pt", arr())
reader2j.AddVariable("j1eta", arr())
reader2j.AddVariable("j2pt", arr())
reader2j.AddVariable("detajj", arr())
reader2j.AddVariable("dphijj", arr())
reader2j.AddVariable("mjj", arr())
reader2j.AddVariable("met", arr())
reader2j.AddVariable("zepen", arr())
reader2j.AddVariable("njets", arr())
reader2j.AddVariable("drmj", arr())
reader2j.AddVariable("m1ptOverMass", arr())
reader2j.AddVariable("m2ptOverMass", arr())
reader2j.AddVariable("m1eta", arr())
reader2j.AddVariable("m2eta", arr())
reader2j.AddSpectator("hmerr", arr())
reader2j.AddSpectator("weight", arr())
reader2j.AddSpectator("hmass", arr())
reader2j.AddSpectator("nbjets", arr())
reader2j.AddSpectator("bdtucsd_inclusive", arr())
reader2j.AddSpectator("bdtucsd_01jet", arr())
reader2j.AddSpectator("bdtucsd_2jet", arr())

for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), )
reader.BookMVA('TMVAClassification_BDTG.weights.2jet_bveto',
               'TMVAClassification_BDTG.weights.2jet_bveto.xml')

X = np.zeros((1, 24), dtype='f')
Z = evaluate_reader(reader, 'TMVAClassification_BDTG.weights.2jet_bveto', X)

示例#17

0

显示文件

文件： evaluate_and_store.py 项目： mickypaganini/TrackJetBTagging

def main(weights, picklename, filename, treename = 'bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights) 
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [var.attrib['Label'] for var in root.findall('Variables')[0].findall('Variable')]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)
    
    
    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(filename, treename, branches = ['jet_pt', 'jet_eta','jet_phi', 'jet_m', 'jet_ip2d_pu', 
        'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc','jet_ip3d_pb',
        'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z', 'jet_sv1_ntrkv',
        'jet_sv1_m','jet_sv1_efc','jet_sv1_n2t','jet_sv1_sig3d',
        'jet_jf_n2t','jet_jf_ntrkAtVx','jet_jf_nvtx','jet_jf_nvtx1t','jet_jf_m',
        'jet_jf_efc','jet_jf_sig3d', 'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz' ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt':'jet_pt',
        'abs(eta)':'abs(jet_eta)',
        'ip2':'jet_ip2',
        'ip2_c':'jet_ip2_c',
        'ip2_cu':'jet_ip2_cu',
        'ip3':'jet_ip3',
        'ip3_c':'jet_ip3_c',
        'ip3_cu':'jet_ip3_cu',
        'sv1_ntkv':'jet_sv1_ntrkv',
        'sv1_mass':'jet_sv1_m',
        'sv1_efrc':'jet_sv1_efc',
        'sv1_n2t':'jet_sv1_n2t',
        'sv1_Lxy':'jet_sv1_Lxy',
        'sv1_L3d':'jet_sv1_L3d',
        'sv1_sig3':'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv':'jet_jf_n2t',
        'jf_ntrkv':'jet_jf_ntrkAtVx',
        'jf_nvtx':'jet_jf_nvtx',
        'jf_nvtx1t':'jet_jf_nvtx1t',
        'jf_mass':'jet_jf_m',
        'jf_efrc':'jet_jf_efc',
        'jf_dR':'jet_jf_dR',
        'jf_sig3':'jet_jf_sig3d' 
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0] ) )
    reader.BookMVA('BDTG akt2', weights) 

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb')) 
    
    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0