def Init(train, test, VarList): # Used in Main above[46] # Setup TMVA ROOT.TMVA.Tools.Instance() ROOT.TMVA.PyMethodBase.PyInitialize() output = ROOT.TFile.Open('~/Data/NNOutput.root', 'RECREATE') factory = ROOT.TMVA.Factory( 'TMVAClassification', output, '!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') dataloader = ROOT.TMVA.DataLoader('dataset') for Var in VarList: dataloader.AddVariable(Var) add_classification_events(dataloader, train.Events, train.OutTrue, weights=train.Weights, signal_label=1) # from root_numpy.tmva add_classification_events(dataloader, test.Events, test.OutTrue, weights=test.Weights, signal_label=1, test=True) dataloader.PrepareTrainingAndTestTree(ROOT.TCut(''), 'SplitSeed=100') # :NormMode=None #CrossCheck(dataloader) return dataloader, factory, output
def train(self, train_data, classification_variables, variable_dict, sample_name, grid_search): """ Definition: ----------- Training method for RootTMVA; it saves the model into the "weights" sub-folder Args: ----- train_data = dictionary, containing "X", "y", "w" for the training set, where: X = ndarray of dim (# training examples, # features) y = array of dim (# training examples) with target values w = array of dim (# training examples) with event weights classification_variables = list of names of variables used for classification variable_dict = ordered dict, mapping all the branches from the TTree to their type sample_name = string that specifies the file name of the sample being trained on """ utils.ensure_directory(os.path.join(self.output_directory, sample_name, self.name)) f_output = TFile(os.path.join(self.output_directory, sample_name, self.name, "TMVA_output.root"), "RECREATE") factory = TMVA.Factory("TMVAClassification", f_output, "AnalysisType=Classification") # -- Add variables to the factory: for v_name in classification_variables: factory.AddVariable(v_name, variable_dict[v_name]) # Call root_numpy's utility functions to add events from the arrays add_classification_events(factory, train_data["X"], train_data["y"], weights=train_data["w"]) add_classification_events(factory, train_data["X"], train_data["y"], weights=train_data["w"], test=True) # need to add some testing events or TMVA will complain # The following line is necessary if events have been added individually: factory.PrepareTrainingAndTestTree(TCut("1"), "NormMode=EqualNumEvents") #-- Define methods: # ["NTrees=200", "MinNodeSize=0.1", "MaxDepth=6", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=IgnoreNegWeightsInTraining"] factory.BookMethod(TMVA.Types.kBDT, "BDT", ":".join( ["NTrees=300", "MinNodeSize=0.01", "MaxDepth=8", "BoostType=Grad", "SeparationType=GiniIndex", "NegWeightTreatment=Pray"] )) # -- Have we considered using a Fisher classifier? # factory.BookMethod(TMVA.Types.kFisher, "Fisher", ":".join( # ["VerbosityLevel=Info", "IgnoreNegWeightsInTraining=False"] # )) # -- Where stuff actually happens: logging.getLogger("root_tmva").info("Train all methods") factory.TrainAllMethods() # -- Organize output: logging.getLogger("root_tmva").info("Organising output") if os.path.isdir(os.path.join(self.output_directory, sample_name, self.name, "weights")): shutil.rmtree(os.path.join(self.output_directory, sample_name, self.name, "weights")) shutil.move("weights", os.path.join(self.output_directory, sample_name, self.name))
def tmva_process(classifier, info, data, labels, sample_weight): """ Create TMVA classification factory, train, test and evaluate all methods :param classifier: classifier to train :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor :param rep.estimators.tmva._AdditionalInformation info: additional information :param pandas.DataFrame data: train data :param labels: array-like - targets :param sample_weight: array-like - weights """ ROOT.TMVA.Tools.Instance() file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE") print(classifier.factory_options) factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options) for var in data.columns: factory.AddVariable(var) # Set data if info.model_type == "classification": if classifier.method == "kCuts": # signal must the first added tree, because rectangular cut optimization in another way doesn't work inds = numpy.argsort(labels)[::-1] data = data.ix[inds, :] labels = labels[inds] sample_weight = sample_weight[inds] add_classification_events(factory, numpy.array(data), labels, weights=sample_weight) add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True) elif info.model_type == "regression": factory.AddTarget("target") add_regression_events(factory, numpy.array(data), labels, weights=sample_weight) add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True) else: raise NotImplementedError("Doesn't support type {}".format(info.model_type)) factory.PrepareTrainingAndTestTree(ROOT.TCut("1"), "") # Set method parameters = ":".join( ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()] ) factory.BookMethod( ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name, parameters ) factory.TrainAllMethods() file_out.Close()
def tmva_process(classifier, info, data, labels, sample_weight): """ Create TMVA classification factory, train, test and evaluate all methods :param classifier: classifier to train :type classifier: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor :param rep.estimators.tmva._AdditionalInformation info: additional information :param pandas.DataFrame data: train data :param labels: array-like - targets :param sample_weight: array-like - weights """ ROOT.TMVA.Tools.Instance() file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE") print(classifier.factory_options) factory = ROOT.TMVA.Factory(info.tmva_job, file_out, classifier.factory_options) for var in data.columns: factory.AddVariable(var) # Set data if info.model_type == 'classification': if classifier.method == 'kCuts': # signal must the first added tree, because rectangular cut optimization in another way doesn't work inds = numpy.argsort(labels)[::-1] data = data.ix[inds, :] labels = labels[inds] sample_weight = sample_weight[inds] add_classification_events(factory, numpy.array(data), labels, weights=sample_weight) add_classification_events(factory, numpy.array(data), labels, weights=sample_weight, test=True) elif info.model_type == 'regression': factory.AddTarget('target') add_regression_events(factory, numpy.array(data), labels, weights=sample_weight) add_regression_events(factory, numpy.array(data), labels, weights=sample_weight, test=True) else: raise NotImplementedError("Doesn't support type {}".format(info.model_type)) factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "") # Set method parameters = ":".join( ["{key}={value}".format(key=key, value=value) for key, value in classifier.method_parameters.items()]) factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, classifier.method), classifier._method_name, parameters) factory.TrainAllMethods() file_out.Close()
def tmva_process(estimator, info, data, target, sample_weight): """ Create a TMVA classification/regression factory; training, testing and evaluating. :param estimator: classifier/regressor which should be trained :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor :param rep.estimators.tmva._AdditionalInformation info: additional information :param pandas.DataFrame data: training data :param target: array-like targets :param sample_weight: array-like samples weights """ ROOT.TMVA.Tools.Instance() file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE") factory = ROOT.TMVA.Factory(info.tmva_job, file_out, estimator.factory_options) for var in data.columns: factory.AddVariable(var) # Set data if info.model_type == 'classification': if estimator.method == 'kCuts': # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way inds = numpy.argsort(target)[::-1] data = data.ix[inds, :] target = target[inds] sample_weight = sample_weight[inds] add_classification_events(factory, numpy.array(data), target, weights=sample_weight) add_classification_events(factory, numpy.array(data), target, weights=sample_weight, test=True) elif info.model_type == 'regression': factory.AddTarget('target') add_regression_events(factory, numpy.array(data), target, weights=sample_weight) add_regression_events(factory, numpy.array(data), target, weights=sample_weight, test=True) else: raise NotImplementedError("Doesn't support type {}".format(info.model_type)) factory.PrepareTrainingAndTestTree(ROOT.TCut('1'), "") # Set method parameters = ":".join( ["{key}={value}".format(key=key, value=value) for key, value in estimator.method_parameters.items()]) factory.BookMethod(ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method), estimator._method_name, parameters) factory.TrainAllMethods() file_out.Close()
X_test, y_test, w_test = X[n_events:], y[n_events:], w[n_events:] output = TFile('tmva_output.root', 'recreate') factory = TMVA.Factory('classifier', output, 'AnalysisType=Multiclass:' '!V:Silent:!DrawProgressBar') if ROOT_VERSION >= '6.07/04': data = TMVA.DataLoader('.') else: data = factory for n in range(2): data.AddVariable('f{0}'.format(n), 'F') # Call root_numpy's utility functions to add events from the arrays add_classification_events(data, X_train, y_train, weights=w_train) add_classification_events(data, X_test, y_test, weights=w_test, test=True) # The following line is necessary if events have been added individually: data.PrepareTrainingAndTestTree(TCut('1'), 'NormMode=EqualNumEvents') # Train an MLP if ROOT_VERSION >= '6.07/04': BookMethod = factory.BookMethod else: BookMethod = TMVA.Factory.BookMethod BookMethod(data, 'MLP', 'MLP', 'NeuronType=tanh:NCycles=200:HiddenLayers=N+2,2:' 'TestRate=5:EstimatorType=MSE') factory.TrainAllMethods() # Classify the test dataset with the BDT
def tmva_process(estimator, info, data, target, sample_weight): """ Create a TMVA classification/regression factory; training, testing and evaluating. :param estimator: classifier/regressor which should be trained :type estimator: rep.estimators.tmva.TMVAClassifier or rep.estimators.tmva.TMVARegressor :param rep.estimators.tmva._AdditionalInformation info: additional information :param pandas.DataFrame data: training data :param target: array-like targets :param sample_weight: array-like samples weights """ ROOT.TMVA.Tools.Instance() file_out = ROOT.TFile(os.path.join(info.directory, info.tmva_root), "RECREATE") factory = ROOT.TMVA.Factory(info.tmva_job, file_out, estimator.factory_options) dataloader = ROOT.TMVA.DataLoader("DataLoader") for var in data.columns: dataloader.AddVariable(var) # Set data if info.model_type == 'classification': if estimator.method == 'kCuts': # signal must be the first added to the tree, because method *rectangular cut optimization* doesn't work in another way inds = numpy.argsort(target)[::-1] data = data.ix[inds, :] target = target[inds] sample_weight = sample_weight[inds] add_classification_events(dataloader, numpy.array(data), target, weights=sample_weight) add_classification_events(dataloader, numpy.array(data), target, weights=sample_weight, test=True) elif info.model_type == 'regression': factory.AddTarget('target') add_regression_events(dataloader, numpy.array(data), target, weights=sample_weight) add_regression_events(dataloader, numpy.array(data), target, weights=sample_weight, test=True) else: raise NotImplementedError("Doesn't support type {}".format( info.model_type)) dataloader.PrepareTrainingAndTestTree(ROOT.TCut('1'), "") # Set method parameters = ":".join([ "{key}={value}".format(key=key, value=value) for key, value in estimator.method_parameters.items() ]) factory.BookMethod( dataloader, ROOT.TMVA.Types.__getattribute__(ROOT.TMVA.Types, estimator.method), estimator._method_name, parameters) factory.TrainAllMethods() file_out.Close()