Exemplo n.º 1
0
def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader, info.method_name, data, aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions
Exemplo n.º 2
0
def tmva_process(info, data):
    """
    Create a TMVA reader and predict data.

    :param rep.estimators.tmva._AdditionalInformationPredict info: additional information
    :param pandas.DataFrame data: data to predict

    """
    import ROOT

    reader = ROOT.TMVA.Reader()

    for feature in data.columns:
        reader.AddVariable(feature, array.array('f', [0.]))

    model_type, sigmoid_function = info.model_type
    reader.BookMVA(info.method_name, info.xml_file)

    signal_efficiency = None
    if model_type == 'classification' and sigmoid_function is not None and 'sig_eff' in sigmoid_function:
        signal_efficiency = float(sigmoid_function.strip().split('=')[1])
        assert 0.0 <= signal_efficiency <= 1., 'signal efficiency must be in [0, 1], not {}'.format(
            signal_efficiency)

    if signal_efficiency is not None:
        predictions = evaluate_reader(reader,
                                      info.method_name,
                                      data,
                                      aux=signal_efficiency)
    else:
        predictions = evaluate_reader(reader, info.method_name, data)
    return predictions
Exemplo n.º 3
0
 def ImportXml(self,train,test,ModelName):
     """ Import the given Xml model """
     reader = ROOT.TMVA.Reader()
     #Filling reader used for eval
     for Var in self.DataSet.LVariables:
         reader.AddVariable(Var,array('f', [0.]))
     xml = './dataset/weights/TMVAClassification_'+ModelName+'.weights.xml'                          
     reader.BookMVA(ModelName,xml)
     return evaluate_reader(reader,ModelName,test.Events), evaluate_reader(reader,ModelName,train.Events)                   # retuns the Classifcation Score
Exemplo n.º 4
0
def plot_clf_results_tmva(reader,
                          x_train,
                          y_train,
                          w_train,
                          x_test,
                          y_test,
                          w_test,
                          nbins=30,
                          figname="BDTOutput_tmva.png",
                          verbose="False"):

    decisions = []
    weights = []
    for x, y, w in ((x_train, y_train, w_train), (x_test, y_test, w_test)):
        w *= 1. / np.sum(w)
        dsig = evaluate_reader(reader, "BDT", x[y > 0.5])
        wsig = w[y > 0.5]
        dbkg = evaluate_reader(reader, "BDT", x[y < 0.5])
        wbkg = w[y < 0.5]
        decisions += [dsig, dbkg]
        weights += [wsig, wbkg]

    plot_clf_results(tuple(decisions), tuple(weights), nbins, figname, verbose)
Exemplo n.º 5
0
def get_tmva_test_results(directory, variables, name=''):
    # TMVA reader
    reader = r.TMVA.Reader()
    for var in variables:
        #vtype = 'i' if var in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'
        reader.AddVariable(var, array('f', [0]))

    reader.BookMVA('BDT', directory + 'weights/TMVA_BDT.weights.xml')

    # Get testing dataset
    filename = directory + 'tmva_output.root'
    x_test = rec2array(root2array(filename, 'TestTree', variables))
    y_test = 1 - root2array(filename, 'TestTree', 'classID')
    w_test = root2array(filename, 'TestTree', 'weight')

    y_decision = evaluate_reader(reader, "BDT", x_test)
    return (y_test, y_decision, w_test, name)
Exemplo n.º 6
0
def evaluate(config, tree, names, transform=None):
    output = []
    dtype = []
    for name in names:
        setup = load(config, name.split("_")[1])
        data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"]))
        if name.startswith("sklearn"):
            fn = os.path.join(config["mvadir"], name + ".pkl")
            with open(fn, 'rb') as fd:
                bdt, label = pickle.load(fd)
            scores = []
            if len(data) > 0:
                scores = bdt.predict_proba(data)[:, 1]
            output += [scores]
            dtype += [(name, 'float64')]

        fn = os.path.join(config["mvadir"], name + ".xml")
        reader = r.TMVA.Reader("Silent")
        for var in setup['variables']:
            reader.AddVariable(var, array('f', [0.]))
        reader.BookMVA("BDT", fn)
        scores = evaluate_reader(reader, "BDT", data)
        output += [scores]
        dtype += [(name.replace("sklearn", "tmvalike"), 'float64')]

    f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ")
    if f.IsOpen():
        likelihood = f.Get("hTargetBinning")

        def lh(values):
            return likelihood.GetBinContent(likelihood.FindBin(*values))
        indices = dict((v, n) for n, (v, _) in enumerate(dtype))
        tt = output[indices['tmvalike_tt']]
        ttZ = output[indices['tmvalike_ttZ']]
        if len(tt) == 0:
            output += [[]]
        else:
            output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)]
        dtype += [('tmvalike_likelihood', 'float64')]
        f.Close()

    data = np.array(zip(*output), dtype)
    tree.mva(array2tree(data))
Exemplo n.º 7
0
    def test(self, test_data, classification_variables, training_sample):
        """
        Definition:
        -----------
            Testing method for RootTMVA; it loads the latest model from the "weights" sub-folder
        Args:
        -----
            data = dictionary, containing "X", "y", "w" for the set to evaluate performance on, where:
                X = ndarray of dim (# examples, # features)
                y = array of dim (# examples) with target values
                w = array of dim (# examples) with event weights
            classification_variables = list of names of variables used for classification
            training_sample = string that specifies the file name of the sample to use as a training (e.g. "SM_merged" or "X350_hh")

        Returns:
        --------
            yhat = the array of BDT outputs, of dimensions (n_events)
        """
        logging.getLogger("root_tmva").info("Evaluating performance...")

        # -- Construct reader and add variables to it:
        logging.getLogger("root_tmva").info("Construct TMVA reader and add variables to it")
        reader = TMVA.Reader()
        for v_name in classification_variables:
            reader.AddVariable(v_name, array.array("f", [0]))

        # -- Load TMVA results
        reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_BDT.weights.xml"))
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, self.name, "weights", "TMVAClassification_Fisher.weights.xml"))
        # -- Load skl_BDT results (used for testing only)
        # reader.BookMVA("BDT", os.path.join(self.output_directory, training_sample, "skl_BDT", "classifier", "skl_BDT_TMVA.weights.xml"))

        yhat = evaluate_reader(reader, "BDT", test_data["X"])
        # -- add binary classification labels
        yhat_class = np.zeros(len(yhat))
        yhat_class[yhat >= 0] = 1
        return yhat, yhat_class
Exemplo n.º 8
0
# Train an MLP
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'MLP', 'MLP',
           'NeuronType=tanh:NCycles=200:HiddenLayers=N+2,2:'
           'TestRate=5:EstimatorType=MSE')
factory.TrainAllMethods()

# Classify the test dataset with the BDT
reader = TMVA.Reader()
for n in range(2):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('MLP', 'weights/classifier_MLP.weights.xml')
class_proba = evaluate_reader(reader, 'MLP', X_test)

# Plot the decision boundaries
plot_colors = "rgb"
plot_step = 0.02
class_names = "ABC"
cmap = plt.get_cmap('Paired')

fig = plt.figure(figsize=(5, 5))
fig.patch.set_alpha(0)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Z = evaluate_reader(reader, 'MLP', np.c_[xx.ravel(), yy.ravel()])
Exemplo n.º 9
0
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'BDT', 'BDT1',
                   'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
BookMethod(data, 'BDT', 'BDT2',
                   'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')
Exemplo n.º 10
0
factory.BookMethod('BDT', 'BDT',
                   'NTrees=100:nEventsMin=30:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.2:SeparationType=GiniIndex:nCuts=10:PruneStrength=2:PruneMethod=ExpectedError')
#factory.BookMethod('kBDT', 'BDT',
#                   'Fisher:VarTransform=None:CreateMVAPdfs:'
#                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
#                   'NsmoothMVAPdf=10')

print 'added all the events and booked the method'
factory.TrainAllMethods()
print 'trained'
# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('BDT', 'weights/classifier_BDT.weights.xml')
twoclass_output = evaluate_reader(reader, 'BDT', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
'''
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))
Exemplo n.º 11
0
# The following line is necessary if events have been added individually:
factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')

# Train a classifier
factory.BookMethod('Fisher', 'Fisher',
                   'Fisher:VarTransform=None:CreateMVAPdfs:'
                   'PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:'
                   'NsmoothMVAPdf=10')
factory.TrainAllMethods()

# Classify the test dataset with the classifier
reader = TMVA.Reader()
for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), array('f', [0.]))
reader.BookMVA('Fisher', 'weights/classifier_Fisher.weights.xml')
twoclass_output = evaluate_reader(reader, 'Fisher', X_test)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"
cmap = plt.get_cmap('bwr')

plt.figure(figsize=(10, 5))

# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))
Exemplo n.º 12
0
def main(weights, picklename, filename, treename='bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights)
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [
        var.attrib['Label']
        for var in root.findall('Variables')[0].findall('Variable')
    ]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)

    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(
        filename,
        treename,
        branches=[
            'jet_pt', 'jet_eta', 'jet_phi', 'jet_m', 'jet_ip2d_pu',
            'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc',
            'jet_ip3d_pb', 'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z',
            'jet_sv1_ntrkv', 'jet_sv1_m', 'jet_sv1_efc', 'jet_sv1_n2t',
            'jet_sv1_sig3d', 'jet_jf_n2t', 'jet_jf_ntrkAtVx', 'jet_jf_nvtx',
            'jet_jf_nvtx1t', 'jet_jf_m', 'jet_jf_efc', 'jet_jf_sig3d',
            'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz'
        ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt': 'jet_pt',
        'abs(eta)': 'abs(jet_eta)',
        'ip2': 'jet_ip2',
        'ip2_c': 'jet_ip2_c',
        'ip2_cu': 'jet_ip2_cu',
        'ip3': 'jet_ip3',
        'ip3_c': 'jet_ip3_c',
        'ip3_cu': 'jet_ip3_cu',
        'sv1_ntkv': 'jet_sv1_ntrkv',
        'sv1_mass': 'jet_sv1_m',
        'sv1_efrc': 'jet_sv1_efc',
        'sv1_n2t': 'jet_sv1_n2t',
        'sv1_Lxy': 'jet_sv1_Lxy',
        'sv1_L3d': 'jet_sv1_L3d',
        'sv1_sig3': 'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv': 'jet_jf_n2t',
        'jf_ntrkv': 'jet_jf_ntrkAtVx',
        'jf_nvtx': 'jet_jf_nvtx',
        'jf_nvtx1t': 'jet_jf_nvtx1t',
        'jf_mass': 'jet_jf_m',
        'jf_efrc': 'jet_jf_efc',
        'jf_dR': 'jet_jf_dR',
        'jf_sig3': 'jet_jf_sig3d'
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0]))
    reader.BookMVA('BDTG akt2', weights)

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(
            np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb'))

    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0
Exemplo n.º 13
0
def cl3d_fixtures(clusters, tcs):
    # print clusters.columns
    # for backward compatibility
    if clusters.empty:
        return clusters

    clusters.rename(columns={
        'clusters_id': 'clusters',
        'clusters_n': 'nclu'
    },
                    inplace=True)
    # clusters['hwQual'] = clusters['quality']
    do_compute_hoe = False
    do_compute_layer_energy = False
    if 'hoe' not in clusters.columns:
        do_compute_hoe = True
    if 'layer_energy' not in clusters.columns:
        do_compute_layer_energy = True

    def compute_layer_energy3(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        results = []
        if do_layer_energy:
            results.append(hist)
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            results.append(hoe)
        return results

    def compute_layer_energy2(cluster, do_layer_energy=True, do_hoe=False):
        components = tcs[tcs.id.isin(cluster.clusters)]
        hist, bins = np.histogram(components.layer.values,
                                  bins=range(0, 29, 2),
                                  weights=components.energy.values)
        if do_layer_energy:
            cluster['layer_energy'] = hist
        if do_hoe:
            em_energy = np.sum(hist)
            hoe = -1
            if em_energy != 0:
                hoe = max(0, cluster.energy - em_energy) / em_energy
            cluster['hoe'] = hoe
        return cluster

    if do_compute_hoe or do_compute_layer_energy:
        # clusters = clusters.apply(lambda cl: compute_layer_energy2(cl,
        #                                                            do_compute_layer_energy,
        #                                                            do_compute_hoe), axis=1)
        new_columns = []
        if do_compute_layer_energy:
            new_columns.append('layer_energy')
        if do_compute_hoe:
            new_columns.append('hoe')
        clusters[new_columns] = clusters.apply(
            lambda cl: compute_layer_energy3(cl, do_compute_layer_energy,
                                             do_compute_hoe),
            result_type='expand',
            axis=1)

    clusters['ptem'] = clusters.pt / (1 + clusters.hoe)
    clusters['eem'] = clusters.energy / (1 + clusters.hoe)
    if False:
        clusters['bdt_pu'] = rnptmva.evaluate_reader(
            classifiers.mva_pu_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])

        clusters['bdt_pi'] = rnptmva.evaluate_reader(
            classifiers.mva_pi_classifier_builder(), 'BDT',
            clusters[['pt', 'eta', 'maxlayer', 'hoe', 'emaxe', 'szz']])
    return clusters
Exemplo n.º 14
0
    print 'Training takes ', stop - start, 's'
if not args.quiet:
    print 'Training done'
    print 'Outputs in directory : ', args.outdir

# evaluate training results
if args.evaluate:

    factory.TestAllMethods()
    factory.EvaluateAllMethods()

    reader = TMVA.Reader()
    for v in var:
        #vtype = 'i' if v in ['nJet','tau0_decaymode','tau1_decaymode','ntags','ntags_loose'] else 'f'

        reader.AddVariable(v, array('f', [0]))

    reader.BookMVA('BDT', args.outdir + 'weights/TMVA_BDT.weights.xml')
    y_decision = evaluate_reader(reader, "BDT", x_test)
    util.plot_clf_results_tmva(reader,
                               x_train,
                               y_train,
                               w_train,
                               x_test,
                               y_test,
                               w_test,
                               figname=args.outdir + "bdtoutput.png",
                               verbose=(not args.quiet))
    util.plot_roc((y_test, y_decision, w_test),
                  figname=args.outdir + 'roc.png')
Exemplo n.º 15
0
if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
    data, 'BDT', 'BDT1', 'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
BookMethod(
    data, 'BDT', 'BDT2', 'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
y_1 = evaluate_reader(reader, 'BDT1', X)
y_2 = evaluate_reader(reader, 'BDT2', X)

# Plot the results
plt.figure()
plt.scatter(X, y, c="k", label="training samples")
plt.plot(X, y_1, c="g", label="1 tree", linewidth=2)
plt.plot(X, y_2, c="r", label="300 trees", linewidth=2)
plt.xlabel("data")
plt.ylabel("target")
plt.title("Boosted Decision Tree Regression")
plt.legend()
plt.savefig('RootNumpy')
Exemplo n.º 16
0
reader2j.AddVariable("hmmphics", arr())
reader2j.AddVariable("j1pt", arr())
reader2j.AddVariable("j1eta", arr())
reader2j.AddVariable("j2pt", arr())
reader2j.AddVariable("detajj", arr())
reader2j.AddVariable("dphijj", arr())
reader2j.AddVariable("mjj", arr())
reader2j.AddVariable("met", arr())
reader2j.AddVariable("zepen", arr())
reader2j.AddVariable("njets", arr())
reader2j.AddVariable("drmj", arr())
reader2j.AddVariable("m1ptOverMass", arr())
reader2j.AddVariable("m2ptOverMass", arr())
reader2j.AddVariable("m1eta", arr())
reader2j.AddVariable("m2eta", arr())
reader2j.AddSpectator("hmerr", arr())
reader2j.AddSpectator("weight", arr())
reader2j.AddSpectator("hmass", arr())
reader2j.AddSpectator("nbjets", arr())
reader2j.AddSpectator("bdtucsd_inclusive", arr())
reader2j.AddSpectator("bdtucsd_01jet", arr())
reader2j.AddSpectator("bdtucsd_2jet", arr())

for n in range(n_vars):
    reader.AddVariable('f{0}'.format(n), )
reader.BookMVA('TMVAClassification_BDTG.weights.2jet_bveto',
               'TMVAClassification_BDTG.weights.2jet_bveto.xml')

X = np.zeros((1, 24), dtype='f')
Z = evaluate_reader(reader, 'TMVAClassification_BDTG.weights.2jet_bveto', X)
def main(weights, picklename, filename, treename = 'bTag_AntiKt2PV0TrackJets'):
    '''
    evaluate the tmva method after transforming input data into right format
    Args:
    -----
        weights:    .xml file out of mv2 training containing bdt parameters
        picklename: name of the output pickle to store new mv2 values
        filename:   .root file with ntuples used to evaluate the tmva method
        treename:   (optional) name of the TTree to consider 
    Returns:
    --------
        status
    Raises:
    -------
        nothing yet, but to be improved
    '''
    print 'Parsing XML file...'
    # -- Load XML file
    tree = ET.parse(weights) 
    root = tree.getroot()

    # -- Get list of variable names from XML file
    var_list = [var.attrib['Label'] for var in root.findall('Variables')[0].findall('Variable')]

    # -- Count the input variables that go into MV2:
    n_vars = len(var_list)
    
    
    print 'Loading .root file for evaluation...'
    # -- Get ntuples:
    df = pup.root2panda(filename, treename, branches = ['jet_pt', 'jet_eta','jet_phi', 'jet_m', 'jet_ip2d_pu', 
        'jet_ip2d_pc', 'jet_ip2d_pb', 'jet_ip3d_pu', 'jet_ip3d_pc','jet_ip3d_pb',
        'jet_sv1_vtx_x', 'jet_sv1_vtx_y', 'jet_sv1_vtx_z', 'jet_sv1_ntrkv',
        'jet_sv1_m','jet_sv1_efc','jet_sv1_n2t','jet_sv1_sig3d',
        'jet_jf_n2t','jet_jf_ntrkAtVx','jet_jf_nvtx','jet_jf_nvtx1t','jet_jf_m',
        'jet_jf_efc','jet_jf_sig3d', 'jet_jf_deta', 'jet_jf_dphi', 'PVx', 'PVy', 'PVz' ])

    # -- Insert default values, calculate MV2 variables from the branches in df
    df = transformVars(df)

    # -- Map ntuple names to var_list
    names_mapping = {
        'pt':'jet_pt',
        'abs(eta)':'abs(jet_eta)',
        'ip2':'jet_ip2',
        'ip2_c':'jet_ip2_c',
        'ip2_cu':'jet_ip2_cu',
        'ip3':'jet_ip3',
        'ip3_c':'jet_ip3_c',
        'ip3_cu':'jet_ip3_cu',
        'sv1_ntkv':'jet_sv1_ntrkv',
        'sv1_mass':'jet_sv1_m',
        'sv1_efrc':'jet_sv1_efc',
        'sv1_n2t':'jet_sv1_n2t',
        'sv1_Lxy':'jet_sv1_Lxy',
        'sv1_L3d':'jet_sv1_L3d',
        'sv1_sig3':'jet_sv1_sig3d',
        'sv1_dR': 'jet_sv1_dR',
        'jf_n2tv':'jet_jf_n2t',
        'jf_ntrkv':'jet_jf_ntrkAtVx',
        'jf_nvtx':'jet_jf_nvtx',
        'jf_nvtx1t':'jet_jf_nvtx1t',
        'jf_mass':'jet_jf_m',
        'jf_efrc':'jet_jf_efc',
        'jf_dR':'jet_jf_dR',
        'jf_sig3':'jet_jf_sig3d' 
    }

    print 'Initializing TMVA...'
    # -- TMVA: Initialize reader, add empty variables and weights from training
    reader = TMVA.Reader()
    for n in range(n_vars):
        reader.AddVariable(var_list[n], array('f', [0] ) )
    reader.BookMVA('BDTG akt2', weights) 

    print 'Creating feature matrix...'
    # -- Get features for each event and store them in X_test
    X_buf = []
    for event in df[[names_mapping[var] for var in var_list]].values:
        X_buf.extend(np.array([normalize_type(jet) for jet in event]).T.tolist())
    X_test = np.array(X_buf)

    print 'Evaluating!'
    # -- TMVA: Evaluate!
    twoclass_output = evaluate_reader(reader, 'BDTG akt2', X_test)

    # -- Reshape the MV2 output into event-jet format
    reorganized = match_shape(twoclass_output, df['jet_pt'])

    import cPickle
    print 'Saving new MV2 weights in {}'.format(picklename)
    cPickle.dump(reorganized, open(picklename, 'wb')) 
    
    # -- Write the new branch to the tree (currently de-activated)
    #add_branch(reorganized, filename, treename, 'jet_mv2c20_new')

    print 'Done. Success!'
    return 0