Exemplo n.º 1
0
def tmva_process(classifier, info, data, labels, sample_weight):
    """
    Create TMVA classification factory, train, test and evaluate all methods

    :param classifier: classifier to train
    :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: train data
    :param labels: array-like - targets
    :param sample_weight: array-like - weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    print(classifier.factory_options)
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == "classification":
        if classifier.method == "kCuts":
            # signal must the first added tree, because rectangular cut optimization in another way doesn't work
            inds = numpy.argsort(labels)[::-1]
            data = data.ix[inds, :]
            labels = labels[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    elif info.model_type == "regression":
        factory.AddTarget("target")
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut("1"), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()]
    )
    factory.BookMethod(
        ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name, parameters
    )

    factory.TrainAllMethods()
    file_out.Close()
Exemplo n.º 2
0
def tmva_process(classifier, info, data, labels, sample_weight):
    """
    Create TMVA classification factory, train, test and evaluate all methods

    :param classifier: classifier to train
    :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: train data
    :param labels: array-like - targets
    :param sample_weight: array-like - weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    print(classifier.factory_options)
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if classifier.method == 'kCuts':
            # signal must the first added tree, because rectangular cut optimization in another way doesn't work
            inds = numpy.argsort(labels)[::-1]
            data = data.ix[inds, :]
            labels = labels[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()])
    factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name,
                       parameters)

    factory.TrainAllMethods()
    file_out.Close()
Exemplo n.º 3
0
def tmva_process(estimator, info, data, target, sample_weight):
    """
    Create a TMVA classification/regression factory; training, testing and evaluating.

    :param estimator: classifier/regressor which should be trained
    :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: training data
    :param target: array-like targets
    :param sample_weight: array-like samples weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE")
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out, estimator.factory_options)

    for var in data.columns:
        factory.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if estimator.method == 'kCuts':
            # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way
            inds = numpy.argsort(target)[::-1]
            data = data.ix[inds, :]
            target = target[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(factory, numpy.array(data), target, weights=sample_weight)
        add_classification_events(factory, numpy.array(data), target, weights=sample_weight, test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(factory, numpy.array(data), target, weights=sample_weight)
        add_regression_events(factory, numpy.array(data), target, weights=sample_weight, test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(info.model_type))

    factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join(
        ["{key}={value}".format(key=key, value=value) for key, value in estimator.method_parameters.items()])
    factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method), estimator._method_name,
                       parameters)

    factory.TrainAllMethods()
    file_out.Close()
Exemplo n.º 4
0
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
data.AddVariable('x', 'F')
data.AddTarget('y', 'F')

add_regression_events(data, X, y)
add_regression_events(data, X, y, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), '')

if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(
    data, 'BDT', 'BDT1', 'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
BookMethod(
    data, 'BDT', 'BDT2', 'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.TrainAllMethods()
Exemplo n.º 5
0
RNG = np.random.RandomState(1)

# Create an example regression dataset
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + \
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output, 'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')
factory.AddVariable('x', 'F')
factory.AddTarget('y', 'F')

add_regression_events(factory, X, y)
add_regression_events(factory, X, y, test=True)
factory.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents')
factory.BookMethod(
    'BDT', 'BDT1', 'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.BookMethod(
    'BDT', 'BDT2', 'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
    'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))
reader.BookMVA('BDT1', 'weights/regressor_BDT1.weights.xml')
reader.BookMVA('BDT2', 'weights/regressor_BDT2.weights.xml')
Exemplo n.º 6
0
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output,
                       'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')

if ROOT_VERSION >= '6.07/04':
    data = TMVA.DataLoader('.')
else:
    data = factory
data.AddVariable('x', 'F')
data.AddTarget('y', 'F')

add_regression_events(data, X, y)
add_regression_events(data, X, y, test=True)
# The following line is necessary if events have been added individually:
data.PrepareTrainingAndTestTree(TCut('1'), '')


if ROOT_VERSION >= '6.07/04':
    BookMethod = factory.BookMethod
else:
    BookMethod = TMVA.Factory.BookMethod
BookMethod(data, 'BDT', 'BDT1',
                   'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
BookMethod(data, 'BDT', 'BDT2',
                   'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
Exemplo n.º 7
0
def tmva_process(estimator, info, data, target, sample_weight):
    """
    Create a TMVA classification/regression factory; training, testing and evaluating.

    :param estimator: classifier/regressor which should be trained
    :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor
    :param rep.estimators.tmva._AdditionalInformation info: additional information
    :param pandas.DataFrame data: training data
    :param target: array-like targets
    :param sample_weight: array-like samples weights
    """

    ROOT.TMVA.Tools.Instance()

    file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root),
                          "RECREATE")
    factory = ROOT.TMVA.Factory(info.tmva_job, file_out,
                                estimator.factory_options)
    dataloader = ROOT.TMVA.DataLoader("DataLoader")

    for var in data.columns:
        dataloader.AddVariable(var)

    # Set data
    if info.model_type == 'classification':
        if estimator.method == 'kCuts':
            # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way
            inds = numpy.argsort(target)[::-1]
            data = data.ix[inds, :]
            target = target[inds]
            sample_weight = sample_weight[inds]
        add_classification_events(dataloader,
                                  numpy.array(data),
                                  target,
                                  weights=sample_weight)
        add_classification_events(dataloader,
                                  numpy.array(data),
                                  target,
                                  weights=sample_weight,
                                  test=True)
    elif info.model_type == 'regression':
        factory.AddTarget('target')
        add_regression_events(dataloader,
                              numpy.array(data),
                              target,
                              weights=sample_weight)
        add_regression_events(dataloader,
                              numpy.array(data),
                              target,
                              weights=sample_weight,
                              test=True)
    else:
        raise NotImplementedError("Doesn't support type {}".format(
            info.model_type))

    dataloader.PrepareTrainingAndTestTree(ROOT.TCut('1'), "")
    # Set method
    parameters = ":".join([
        "{key}={value}".format(key=key, value=value)
        for key, value in estimator.method_parameters.items()
    ])
    factory.BookMethod(
        dataloader,
        ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method),
        estimator._method_name, parameters)

    factory.TrainAllMethods()
    file_out.Close()
Exemplo n.º 8
0
# Create an example regression dataset
X = np.linspace(0, 6, 100)[:, np.newaxis]
y = np.sin(X).ravel() + \
    np.sin(6 * X).ravel() + \
    RNG.normal(0, 0.1, X.shape[0])

# Fit a regression model
output = TFile('tmva_output.root', 'recreate')
factory = TMVA.Factory('regressor', output,
                       'AnalysisType=Regression:'
                       '!V:Silent:!DrawProgressBar')
factory.AddVariable('x', 'F')
factory.AddTarget('y', 'F')

add_regression_events(factory, X, y)
add_regression_events(factory, X, y, test=True)
# The following line is necessary if events have been added individually:
factory.PrepareTrainingAndTestTree(TCut('1'), '')

factory.BookMethod('BDT', 'BDT1',
                   'nCuts=20:NTrees=1:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
factory.BookMethod('BDT', 'BDT2',
                   'nCuts=20:NTrees=300:MaxDepth=4:BoostType=AdaBoostR2:'
                   'SeparationType=RegressionVariance')
factory.TrainAllMethods()

# Predict the regression target
reader = TMVA.Reader()
reader.AddVariable('x', array('f', [0.]))