示例#1
0
    def save_ML_plots(self,
                      model_handler,
                      data,
                      eff_score_array,
                      cent_class,
                      pt_range,
                      ct_range,
                      split=''):
        fig_path = os.environ['HYPERML_FIGURES_{}'.format(self.mode)]
        info_string = f'_{cent_class[0]}{cent_class[1]}_{pt_range[0]}{pt_range[1]}_{ct_range[0]}{ct_range[1]}{split}'

        bdt_score_dir = fig_path + '/TrainTest'
        bdt_eff_dir = fig_path + '/Efficiency'
        feat_imp_dir = fig_path + '/FeatureImp'

        bdt_score_plot = plot_utils.plot_output_train_test(model_handler,
                                                           data,
                                                           bins=100,
                                                           log=True)
        if not os.path.exists(bdt_score_dir):
            os.makedirs(bdt_score_dir)

        bdt_score_plot.savefig(bdt_score_dir + '/BDT_Score' + info_string +
                               '.pdf')

        bdt_eff_plot = plot_utils.plot_bdt_eff(eff_score_array[1],
                                               eff_score_array[0])
        if not os.path.exists(bdt_eff_dir):
            os.makedirs(bdt_eff_dir)

        bdt_eff_plot.savefig(bdt_eff_dir + '/BDT_Eff' + info_string + '.pdf')

        feat_imp = plot_utils.plot_feature_imp(
            data[2]
            [model_handler.get_original_model().get_booster().feature_names],
            data[3], model_handler)
        if not os.path.exists(feat_imp_dir):
            os.makedirs(feat_imp_dir)

        plt.savefig(feat_imp_dir + '/FeatImp' + info_string + '.pdf')
        plt.close()

        print('ML plots saved.\n')
示例#2
0
                        f'matter {split_ineq_sign} and centrality > {cent_bins[0]} and centrality < {cent_bins[1]} and ct >= {ct_bins_df[0]} and ct < {ct_bins_df[1]}'
                    )
                    train_test_data_cent[1] = train_test_data_cent[0]['y_true']
                    train_test_data_cent[3] = train_test_data_cent[2]['y_true']

                    # get predictions for training and test sets
                    test_y_score = model_hdl.predict(train_test_data_cent[2])
                    train_y_score = model_hdl.predict(train_test_data_cent[0])

                    # second condition needed because of issue with Qt libraries
                    if MAKE_TRAIN_TEST_PLOT and not MAKE_PRESELECTION_EFFICIENCY:
                        if not os.path.isdir(f'{PLOT_DIR}/train_test_out'):
                            os.mkdir(f'{PLOT_DIR}/train_test_out')
                        plot_utils.plot_output_train_test(model_hdl,
                                                          train_test_data_cent,
                                                          logscale=True,
                                                          density=True,
                                                          labels=leg_labels)
                        plt.savefig(
                            f'{PLOT_DIR}/train_test_out/{bin_df}_out.pdf')

                        plot_utils.plot_feature_imp(train_test_data_cent[0],
                                                    train_test_data_cent[1],
                                                    model_hdl)
                        plt.savefig(
                            f'{PLOT_DIR}/train_test_out/feature_imp_training_{bin_df}.pdf'
                        )
                        plot_utils.plot_roc_train_test(train_test_data_cent[3],
                                                       test_y_score,
                                                       train_test_data_cent[1],
                                                       train_y_score,
def train_test(inputCfg, PtBin, OutPutDirPt, TrainTestData, iBin):  #pylint: disable=too-many-statements, too-many-branches
    '''
    function for model training and testing
    '''
    n_classes = len(np.unique(TrainTestData[3]))
    modelClf = xgb.XGBClassifier(use_label_encoder=False)
    TrainCols = inputCfg['ml']['training_columns']
    HyperPars = inputCfg['ml']['hyper_par'][iBin]
    if not isinstance(TrainCols, list):
        print('\033[91mERROR: training columns must be defined!\033[0m')
        sys.exit()
    if not isinstance(HyperPars, dict):
        print(
            '\033[91mERROR: hyper-parameters must be defined or be an empty dict!\033[0m'
        )
        sys.exit()
    ModelHandl = ModelHandler(modelClf, TrainCols, HyperPars)

    # hyperparams optimization
    if inputCfg['ml']['hyper_par_opt']['do_hyp_opt']:
        print('Perform bayesian optimization')

        BayesOptConfig = inputCfg['ml']['hyper_par_opt']['bayes_opt_config']
        if not isinstance(BayesOptConfig, dict):
            print('\033[91mERROR: bayes_opt_config must be defined!\033[0m')
            sys.exit()

        if n_classes > 2:
            average_method = inputCfg['ml']['roc_auc_average']
            roc_method = inputCfg['ml']['roc_auc_approach']
            if not (average_method in ['macro', 'weighted']
                    and roc_method in ['ovo', 'ovr']):
                print(
                    '\033[91mERROR: selected ROC configuration is not valid!\033[0m'
                )
                sys.exit()

            if average_method == 'weighted':
                metric = f'roc_auc_{roc_method}_{average_method}'
            else:
                metric = f'roc_auc_{roc_method}'
        else:
            metric = 'roc_auc'

        print('Performing hyper-parameters optimisation: ...', end='\r')
        OutFileHypPars = open(
            f'{OutPutDirPt}/HyperParOpt_pT_{PtBin[0]}_{PtBin[1]}.txt', 'wt')
        sys.stdout = OutFileHypPars
        ModelHandl.optimize_params_bayes(
            TrainTestData,
            BayesOptConfig,
            metric,
            nfold=inputCfg['ml']['hyper_par_opt']['nfolds'],
            init_points=inputCfg['ml']['hyper_par_opt']['initpoints'],
            n_iter=inputCfg['ml']['hyper_par_opt']['niter'],
            njobs=inputCfg['ml']['hyper_par_opt']['njobs'])
        OutFileHypPars.close()
        sys.stdout = sys.__stdout__
        print('Performing hyper-parameters optimisation: Done!')
        print(
            f'Output saved in {OutPutDirPt}/HyperParOpt_pT_{PtBin[0]}_{PtBin[1]}.txt'
        )
        print(f'Best hyper-parameters:\n{ModelHandl.get_model_params()}')
    else:
        ModelHandl.set_model_params(HyperPars)

    # train and test the model with the updated hyper-parameters
    yPredTest = ModelHandl.train_test_model(
        TrainTestData,
        True,
        output_margin=inputCfg['ml']['raw_output'],
        average=inputCfg['ml']['roc_auc_average'],
        multi_class_opt=inputCfg['ml']['roc_auc_approach'])
    yPredTrain = ModelHandl.predict(TrainTestData[0],
                                    inputCfg['ml']['raw_output'])

    # save model handler in pickle
    ModelHandl.dump_model_handler(
        f'{OutPutDirPt}/ModelHandler_pT_{PtBin[0]}_{PtBin[1]}.pickle')
    ModelHandl.dump_original_model(
        f'{OutPutDirPt}/XGBoostModel_pT_{PtBin[0]}_{PtBin[1]}.model', True)

    #plots
    LegLabels = [
        inputCfg['output']['leg_labels']['Bkg'],
        inputCfg['output']['leg_labels']['Prompt']
    ]
    if inputCfg['output']['leg_labels']['FD'] is not None:
        LegLabels.append(inputCfg['output']['leg_labels']['FD'])
    OutputLabels = [
        inputCfg['output']['out_labels']['Bkg'],
        inputCfg['output']['out_labels']['Prompt']
    ]
    if inputCfg['output']['out_labels']['FD'] is not None:
        OutputLabels.append(inputCfg['output']['out_labels']['FD'])
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (10, 7)
    MLOutputFig = plot_utils.plot_output_train_test(
        ModelHandl,
        TrainTestData,
        80,
        inputCfg['ml']['raw_output'],
        LegLabels,
        inputCfg['plots']['train_test_log'],
        density=True)
    if n_classes > 2:
        for Fig, Lab in zip(MLOutputFig, OutputLabels):
            Fig.savefig(
                f'{OutPutDirPt}/MLOutputDistr{Lab}_pT_{PtBin[0]}_{PtBin[1]}.pdf'
            )
    else:
        MLOutputFig.savefig(
            f'{OutPutDirPt}/MLOutputDistr_pT_{PtBin[0]}_{PtBin[1]}.pdf')
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (10, 9)
    ROCCurveFig = plot_utils.plot_roc(TrainTestData[3], yPredTest, None,
                                      LegLabels,
                                      inputCfg['ml']['roc_auc_average'],
                                      inputCfg['ml']['roc_auc_approach'])
    ROCCurveFig.savefig(
        f'{OutPutDirPt}/ROCCurveAll_pT_{PtBin[0]}_{PtBin[1]}.pdf')
    pickle.dump(
        ROCCurveFig,
        open(f'{OutPutDirPt}/ROCCurveAll_pT_{PtBin[0]}_{PtBin[1]}.pkl', 'wb'))
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (10, 9)
    ROCCurveTTFig = plot_utils.plot_roc_train_test(
        TrainTestData[3], yPredTest, TrainTestData[1], yPredTrain, None,
        LegLabels, inputCfg['ml']['roc_auc_average'],
        inputCfg['ml']['roc_auc_approach'])
    ROCCurveTTFig.savefig(
        f'{OutPutDirPt}/ROCCurveTrainTest_pT_{PtBin[0]}_{PtBin[1]}.pdf')
    #_____________________________________________
    PrecisionRecallFig = plot_utils.plot_precision_recall(
        TrainTestData[3], yPredTest, LegLabels)
    PrecisionRecallFig.savefig(
        f'{OutPutDirPt}/PrecisionRecallAll_pT_{PtBin[0]}_{PtBin[1]}.pdf')
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (12, 7)
    FeaturesImportanceFig = plot_utils.plot_feature_imp(
        TrainTestData[2][TrainCols], TrainTestData[3], ModelHandl, LegLabels)
    n_plot = n_classes if n_classes > 2 else 1
    for iFig, Fig in enumerate(FeaturesImportanceFig):
        if iFig < n_plot:
            label = OutputLabels[iFig] if n_classes > 2 else ''
            Fig.savefig(
                f'{OutPutDirPt}/FeatureImportance{label}_pT_{PtBin[0]}_{PtBin[1]}.pdf'
            )
        else:
            Fig.savefig(
                f'{OutPutDirPt}/FeatureImportanceAll_pT_{PtBin[0]}_{PtBin[1]}.pdf'
            )

    return ModelHandl
示例#4
0
def test_plot_bdt_output():
    """
    Test the test-training bdt output plot
    """
    assert isinstance(plot_utils.plot_output_train_test(
        MODEL, DATA), list)
示例#5
0
def train_test(inputCfg, PtMin, PtMax, OutPutDirPt, TrainTestData):
    '''
    function for model training and testing
    '''
    modelClf = xgb.XGBClassifier()
    TrainCols = inputCfg['ml']['training_columns']
    HyperPars = inputCfg['ml']['hyper_par']
    if not isinstance(TrainCols, list):
        print('ERROR: training columns must be defined!')
        sys.exit()
    if not isinstance(HyperPars, dict):
        print('ERROR: hyper-parameters must be defined or be an empty dict!')
        sys.exit()
    ModelHandl = ModelHandler(modelClf, TrainCols, HyperPars)

    # hyperparams optimization --> not working with multi-class classification at the moment
    #HypRanges = {
    #    # # defines the maximum depth of a single tree (regularization)
    #    'max_depth': (1, 30),
    #    'learning_rate': (0.01, 0.3),  # learning rate
    #    'n_estimators': (50, 1000)  # number of boosting trees
    #}
    #ModelHandl.optimize_params_bayes(TrainTestData, HypRanges, None)

    # train and test the model with the updated hyperparameters
    ModelHandl.train_test_model(TrainTestData)
    yPredTest = ModelHandl.predict(TrainTestData[2],
                                   inputCfg['ml']['raw_output'], True)

    # save model handler in pickle
    ModelHandl.dump_model_handler(
        f'{OutPutDirPt}/ModelHandler_pT_{PtMin}_{PtMax}.pickle')

    #plots
    LegLabels = inputCfg['output']['leg_labels']
    OutputLabels = inputCfg['output']['out_labels']
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (10, 7)
    MLOutputFig = plot_utils.plot_output_train_test(
        ModelHandl,
        TrainTestData,
        80,
        inputCfg['ml']['raw_output'],
        LegLabels,
        True,
        inputCfg['plots']['train_test_log'],
        density=True)
    for Fig, Lab in zip(MLOutputFig, OutputLabels):
        Fig.savefig(f'{OutPutDirPt}/MLOutputDistr{Lab}_pT_{PtMin}_{PtMax}.pdf')
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (8, 7)
    ROCCurveFig = plot_utils.plot_roc(TrainTestData[3], yPredTest, LegLabels)
    ROCCurveFig.savefig(f'{OutPutDirPt}/ROCCurveAll_pT_{PtMin}_{PtMax}.pdf')
    #_____________________________________________
    PrecisionRecallFig = plot_utils.plot_precision_recall(
        TrainTestData[3], yPredTest, LegLabels)
    PrecisionRecallFig.savefig(
        f'{OutPutDirPt}/PrecisionRecallAll_pT_{PtMin}_{PtMax}.pdf')
    #_____________________________________________
    plt.rcParams["figure.figsize"] = (12, 7)
    FeaturesImportanceFig = plot_utils.plot_feature_imp(
        TrainTestData[2][TrainCols], TrainTestData[3], ModelHandl)
    for iFig, Fig in enumerate(FeaturesImportanceFig):
        if iFig < 3:
            Fig.savefig(
                f'{OutPutDirPt}/FeatureImportance{OutputLabels[iFig]}_pT_{PtMin}_{PtMax}.pdf'
            )
        else:
            Fig.savefig(
                f'{OutPutDirPt}/FeatureImportanceAll_pT_{PtMin}_{PtMax}.pdf')

    return ModelHandl
示例#6
0
def train_xgboost_model(signal,
                        background,
                        filename_dict,
                        params,
                        params_range,
                        flag_dict,
                        training_variables='',
                        testsize=0.5):
    '''
    Trains an XGBOOST model using hipe4ml and plot output distribution and feature importance
    '''

    print('Training XGBOOST model')

    training_fig_path = filename_dict['analysis_path'] + "/images/training"

    train_test_data = train_test_generator([signal, background], [1, 0],
                                           test_size=testsize)

    if training_variables == '':
        training_variables = train_test_data[0].columns.tolist()

    model_clf = xgb.XGBClassifier()
    model_hdl = ModelHandler(model_clf, training_variables)
    if not flag_dict['use_default_param']:
        model_hdl.set_model_params(params)

    if flag_dict['benchmark_opt']:

        print('Benchamarking optimizers\n')
        import time
        from sklearn.metrics import roc_auc_score
        times_sk = []
        roc_sk = []

        for i in range(1):
            start = time.time()

            model_hdl.optimize_params_bayes(train_test_data,
                                            params_range,
                                            'roc_auc',
                                            njobs=-1)
            model_hdl.train_test_model(train_test_data, )

            y_pred_test = model_hdl.predict(
                train_test_data[2], True)  #used to evaluate model performance

            roc_sk.append(roc_auc_score(train_test_data[3], y_pred_test))

            times_sk.append(time.time() - start)

        print('\nBAYES OPTIMIZATION WITH SKLEARN')
        print('Mean time : ' + str(np.mean(times_sk)))
        print('Mean ROC : ' + str(np.mean(roc_sk)))
        print('--------------\n')
        print('OPTUNA')

        time = []
        roc = []

        for i in range(1):

            for key in params:
                if isinstance(params[key], str):
                    params_range[key] = params[key]

            model_hdl.optimize_params_optuna(train_test_data,
                                             params_range,
                                             'roc_auc',
                                             timeout=flag_dict['timeout'],
                                             n_jobs=flag_dict['n_jobs'])
            model_hdl.train_test_model(train_test_data, )

            y_pred_test = model_hdl.predict(
                train_test_data[2], True)  #used to evaluate model performance

            roc.append(roc_auc_score(train_test_data[3], y_pred_test))

        print('\nBAYES OPTIMIZATION WITH SKLEARN')
        print('Mean time : ' + str(np.mean(times_sk)))
        print('Mean ROC : ' + str(np.mean(roc_sk)))
        print('--------------\n')
        print('OPTUNA')
        print('Fixed time : ' + str(np.mean(time)))
        print('Mean ROC : ' + str(np.mean(roc)))
        print('--------------\n')

    if flag_dict['optimize_bayes']:
        import time
        print('Doing Bayes optimization of hyperparameters\n')
        start = time.time()
        model_hdl.optimize_params_bayes(train_test_data,
                                        params_range,
                                        'roc_auc',
                                        n_iter=700,
                                        njobs=flag_dict['n_jobs'])
        print('Elapsed time: ' + str(time.time() - start))

    if flag_dict['optimize_optuna']:
        print('Doing Optuna optimization of hyperparameters\n')
        for key in params:
            if isinstance(params[key], str):
                params_range[key] = params[key]
        study = model_hdl.optimize_params_optuna(train_test_data,
                                                 params_range,
                                                 scoring='roc_auc',
                                                 timeout=flag_dict['timeout'],
                                                 n_jobs=flag_dict['n_jobs'],
                                                 n_trials=None)

        print('Parameters optimization done!\n')

        if flag_dict['plot_optim']:
            print('Saving optimization plots')
            fig = optuna.visualization.plot_slice(study)
            fig.write_image(training_fig_path + '/optuna_slice.png')
            fig = optuna.visualization.plot_optimization_history(study)
            fig.write_image(training_fig_path + '/optuna_history.png')
            '''fig = optuna.visualization.plot_param_importances(study)
            fig.write_image(training_fig_path + '/optuna_param_importance.png')
            fig = optuna.visualization.plot_contour(study)
            fig.write_image(training_fig_path + '/optuna_contour.png')'''
            print('Done\n')

        import joblib

        joblib.dump(study, filename_dict['analysis_path'] + "model/study.pkl")

    model_hdl.train_test_model(train_test_data, )
    print(model_hdl.get_model_params())

    print('Predicting values on training and test datas')
    y_pred_train = model_hdl.predict(train_test_data[0], True)
    y_pred_test = model_hdl.predict(train_test_data[2],
                                    True)  #used to evaluate model performance
    print('Prediction done\n')

    plt.rcParams["figure.figsize"] = (10, 7)
    leg_labels = ['background', 'signal']

    print('Saving Output comparison plot')
    plt.figure()
    ml_out_fig = plot_utils.plot_output_train_test(model_hdl,
                                                   train_test_data,
                                                   100,
                                                   True,
                                                   leg_labels,
                                                   True,
                                                   density=False)
    plt.savefig(training_fig_path + '/output_train_test.png',
                dpi=300,
                facecolor='white')
    plt.close()
    print('Done\n')

    print('Saving ROC AUC plot')
    plt.figure()
    roc_train_test_fig = plot_utils.plot_roc_train_test(
        train_test_data[3], y_pred_test, train_test_data[1], y_pred_train,
        None, leg_labels)  #ROC AUC plot
    plt.savefig(training_fig_path + '/ROC_AUC_train_test.png',
                dpi=300,
                facecolor='white')

    import pickle
    with open(training_fig_path + '/ROC_AUC_train_test.pickle', 'wb') as f:
        pickle.dump(roc_train_test_fig, f)
    plt.close()

    print('Done\n')

    print('Saving feature importance plots')
    plt.figure()
    feat_imp_1, feat_imp_2 = plot_utils.plot_feature_imp(train_test_data[2],
                                                         train_test_data[3],
                                                         model_hdl,
                                                         approximate=True)
    feat_imp_1.savefig(training_fig_path +
                       '/feature_importance_HIPE4ML_violin.png',
                       dpi=300,
                       facecolor='white')
    feat_imp_2.savefig(training_fig_path +
                       '/feature_importance_HIPE4ML_bar.png',
                       dpi=300,
                       facecolor='white')
    plt.close()
    print('Done\n')

    efficiency_score_conversion(train_test_data, y_pred_test, filename_dict)

    return train_test_data, y_pred_test, model_hdl
示例#7
0
                    train_test_data_cent[0].loc[:,'model_output_prompt'] = train_y_score[:,2]
                    train_test_data_cent[2].loc[:,'model_output_prompt'] = test_y_score[:,2]
                    train_test_data_cent[0].loc[:,'model_output_non_prompt'] = train_y_score[:,1]
                    train_test_data_cent[2].loc[:,'model_output_non_prompt'] = test_y_score[:,1]
                    train_test_data_cent[0].loc[:,'model_output_background'] = train_y_score[:,0]
                    train_test_data_cent[2].loc[:,'model_output_background'] = test_y_score[:,0]

                    # write
                    train_test_data_cent[0].to_parquet(f'df/train_data_{cent_bins[0]}_{cent_bins[1]}_{ct_bins_df[0]}_{ct_bins_df[1]}.parquet.gzip',compression='gzip')
                    train_test_data_cent[2].to_parquet(f'df/test_data_{cent_bins[0]}_{cent_bins[1]}_{ct_bins_df[0]}_{ct_bins_df[1]}.parquet.gzip',compression='gzip')
                    
                    # second condition needed because of issue with Qt libraries
                    if MAKE_TRAIN_TEST_PLOT and not MAKE_PRESELECTION_EFFICIENCY:
                        if not os.path.isdir(f'{PLOT_DIR}/train_test_out'):
                            os.mkdir(f'{PLOT_DIR}/train_test_out')
                        out_figs = plot_utils.plot_output_train_test(model_hdl, train_test_data_cent, bins=50,
                                                        logscale=True, density=True, labels=leg_labels, output_margin=False)
                        for i_label, label in enumerate(leg_labels):
                            out_figs[i_label].savefig(f'{PLOT_DIR}/train_test_out/{bin_df}_out_{label}.pdf')

                        feat_imp = plot_utils.plot_feature_imp(train_test_data_cent[0], train_test_data_cent[1], model_hdl)
                        for i_label, label in enumerate(leg_labels):
                            feat_imp[i_label].savefig(f'{PLOT_DIR}/train_test_out/feature_imp_training_{bin_df}_{label}.pdf')
                        feat_imp[3].savefig(f'{PLOT_DIR}/train_test_out/feature_imp_training_{bin_df}_all.pdf')
                        plot_utils.plot_roc_train_test(
                            train_test_data_cent[3],
                            test_y_score, train_test_data_cent[1],
                            train_y_score, labels=leg_labels, multi_class_opt="ovr")
                        plt.savefig(f'{PLOT_DIR}/train_test_out/roc_train_test_{bin_df}.pdf')
                        plt.close('all')

                    if COMPUTE_SCORES_FROM_EFF:
HYP_RANGES = {
    # # defines the maximum depth of a single tree (regularization)
    'max_depth': (5, 15),
    # 'learning_rate': (0.01, 0.3),  # learning rate
    'n_estimators': (5, 10),  # number of boosting trees
}
MODEL.optimize_params_bayes(DATA, HYP_RANGES, 'roc_auc')

# train and test the model with the updated hyperparameters
MODEL.train_test_model(DATA)
Y_PRED = MODEL.predict(DATA[2])

# Calculate the BDT efficiency as a function of the BDT score
EFFICIENCY, THRESHOLD = analysis_utils.bdt_efficiency_array(
    DATA[3], Y_PRED, n_points=10)
# --------------------------------------------


# PLOTTING
# --------------------------------------------
FEATURES_DISTRIBUTIONS_PLOT = plot_utils.plot_distr(
    [SIG_DF, BKG_DF], SIG_DF.columns)
CORRELATION_MATRIX_PLOT = plot_utils.plot_corr([SIG_DF, BKG_DF], SIG_DF.columns)
BDT_OUTPUT_PLOT = plot_utils.plot_output_train_test(MODEL, DATA)
ROC_CURVE_PLOT = plot_utils.plot_roc(DATA[3], Y_PRED)
PRECISION_RECALL_PLOT = plot_utils.plot_precision_recall(DATA[3], Y_PRED)
BDT_EFFICIENCY_PLOT = plot_utils.plot_bdt_eff(THRESHOLD, EFFICIENCY)
FEATURES_IMPORTANCE = plot_utils.plot_feature_imp(TEST_SET, Y_TEST, MODEL)
plt.show()
# ---------------------------------------------
    def do_hipe4mlplot(self):
        self.logger.info("Plotting hipe4ml model")

        leglabels = ["Background", "Prompt signal"]
        outputlabels = ["Bkg", "SigPrompt"]

        # _____________________________________________
        plot_utils.plot_distr([self.bkghandler, self.signalhandler],
                              self.v_train, 100, leglabels)
        plt.subplots_adjust(left=0.06,
                            bottom=0.06,
                            right=0.99,
                            top=0.96,
                            hspace=0.55,
                            wspace=0.55)
        figname = f'{self.dirmlplot}/DistributionsAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        plt.savefig(figname)
        plt.close('all')
        # _____________________________________________
        corrmatrixfig = plot_utils.plot_corr(
            [self.bkghandler, self.signalhandler], self.v_train, leglabels)
        for figg, labb in zip(corrmatrixfig, outputlabels):
            plt.figure(figg.number)
            plt.subplots_adjust(left=0.2, bottom=0.25, right=0.95, top=0.9)
            figname = f'{self.dirmlplot}/CorrMatrix{labb}_pT_{self.p_binmin}_{self.p_binmax}.pdf'
            figg.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 7)
        mloutputfig = plot_utils.plot_output_train_test(
            self.p_hipe4ml_model,
            self.traintestdata,
            80,
            self.raw_output_hipe4ml,
            leglabels,
            self.train_test_log_hipe4ml,
            density=True)
        figname = f'{self.dirmlplot}/MLOutputDistr_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        mloutputfig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 9)
        roccurvefig = plot_utils.plot_roc(self.traintestdata[3],
                                          self.ypredtest_hipe4ml, None,
                                          leglabels,
                                          self.average_method_hipe4ml,
                                          self.roc_method_hipe4ml)
        figname = f'{self.dirmlplot}/ROCCurveAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        roccurvefig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 9)
        roccurvettfig = plot_utils.plot_roc_train_test(
            self.traintestdata[3], self.ypredtest_hipe4ml,
            self.traintestdata[1], self.ypredtrain_hipe4ml, None, leglabels,
            self.average_method_hipe4ml, self.roc_method_hipe4ml)
        figname = f'{self.dirmlplot}/ROCCurveTrainTest_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        roccurvettfig.savefig(figname)
        # _____________________________________________
        precisionrecallfig = plot_utils.plot_precision_recall(
            self.traintestdata[3], self.ypredtest_hipe4ml, leglabels)
        figname = f'{self.dirmlplot}/PrecisionRecallAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        precisionrecallfig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (12, 7)
        featuresimportancefig = plot_utils.plot_feature_imp(
            self.traintestdata[2][self.v_train], self.traintestdata[3],
            self.p_hipe4ml_model, leglabels)
        for i in range(0, len(featuresimportancefig)):
            figname = (f'{self.dirmlplot}/FeatureImportanceOpt{i}_'
                       f'pT_{self.p_binmin}_{self.p_binmax}.pdf')
            featuresimportancefig[i].savefig(figname)
    model_hdl = ModelHandler(xgb.XGBClassifier(), training_columns)
    model_hdl.set_model_params(MODEL_PARAMS)
    model_hdl.set_model_params(HYPERPARAMS)
    if optmize:
        model_hdl.optimize_params_bayes(train_test_data,
                                        params_range,
                                        'roc_auc',
                                        njobs=-1,
                                        init_points=10,
                                        n_iter=20)

    y_pred_test = model_hdl.train_test_model(train_test_data, True, True)

    bdt_out_plot = pu.plot_output_train_test(model_hdl,
                                             train_test_data,
                                             100,
                                             True, ["Signal", "Background"],
                                             True,
                                             density=True)
    bdt_out_plot.savefig(results_ml_path + "/bdt_output.png")

    if not os.path.exists(ml_model_path):
        os.makedirs(ml_model_path)
    model_hdl.dump_model_handler(ml_model_path + "/model_hndl.pkl")

    feature_importance_plot = pu.plot_feature_imp(train_test_data[2],
                                                  train_test_data[3],
                                                  model_hdl)
    feature_importance_plot[0].savefig(results_ml_path +
                                       "/feature_importance_1.png")
    feature_importance_plot[1].savefig(results_ml_path +
                                       "/feature_importance_2.png")