Exemplo n.º 1
0
def main():
    # read config file
    parser = argparse.ArgumentParser(description='Arguments to pass')
    parser.add_argument('cfgFileName', metavar='text', default='cfgFileNameCheck.yml',
                        help='config file name for check')
    args = parser.parse_args()

    print('Loading check configuration: ...', end='\r')
    with open(args.cfgFileName, 'r') as ymlCfgFile:
        inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
    print('Loading check configuration: Done!')

    print('Loading data files: ...', end='\r')
    DfList = []
    for filePath in inputCfg['input']['files']:
        DfList.append(pd.read_parquet(filePath))
    print('Loading data files: Done!')

    for (PtMin, PtMax) in zip(inputCfg['pt_ranges']['min'], inputCfg['pt_ranges']['max']):
        print(f'Plot variable distributions --- {PtMin} < pT < {PtMax} GeV/c')
        DfListPt = []
        for df in DfList:
            DfListPt.append(df.query(f'{PtMin} < pt_cand < {PtMax}'))
        VarsToDraw = inputCfg['plotting_columns']
        LegLabels = inputCfg['output']['leg_labels']
        OutPutDir = inputCfg['output']['dir']
        plot_utils.plot_distr(DfListPt, VarsToDraw, (12, 7), 100, True, LegLabels)
        plt.subplots_adjust(left=0.06, bottom=0.06, right=0.99, top=0.96, hspace=0.55, wspace=0.55)
        plt.savefig(f'{OutPutDir}/DistrComp_pT_{PtMin}_{PtMax}.pdf')
        plt.close('all')
        del DfListPt

    del DfList
Exemplo n.º 2
0
def plot_distr_comparison(hdl1,
                          hdl2,
                          name,
                          filename_dict,
                          label_1='df1',
                          label_2='df2',
                          col_names=None,
                          nbins=100):

    print('Plotting comparison of variable distributions')

    df1 = hdl1.get_data_frame()
    df2 = hdl2.get_data_frame()

    if col_names == None:
        col_names = list(df1.columns)

    column = []

    for col in col_names:
        if col in list(df2.columns):
            column.append(col)

    plt.close()

    plot_utils.plot_distr([hdl1, hdl2],
                          alpha=0.5,
                          bins=100,
                          labels=[label_1, label_2],
                          figsize=((20, 20)),
                          density=True,
                          column=column)
    plt.savefig(filename_dict['analysis_path'] + 'images/var_distribution/' +
                name[:-1] + '.png')
    plt.close()

    for col in col_names:
        if col in list(df2.columns):
            plt.figure()
            df1[col].hist(alpha=0.5, bins=nbins, label=label_1, density=True)
            df2[col].hist(alpha=0.5, bins=nbins, label=label_2, density=True)
            plt.legend()
            plt.savefig(filename_dict['analysis_path'] +
                        'images/var_distribution/' + name + str(col) + '.png',
                        facecolor='white')
            plt.close()

    print('Done\n')
Exemplo n.º 3
0
def plot_distributions(tree_hdl, filename_dict, name, vars=None):
    """Plot the distribution of the variables in the tree handler

    Args:
        tree_hdl (hipe4ml.tree_handler): the tree with the data
        filename_dict (dictionary): dictionary of the filenames
        name (string): name of the plot
        vars (list, optional): the variables to plot. None for all variables. Defaults to None.
    """
    plt.close()
    plots = plot_utils.plot_distr(tree_hdl, column=vars, figsize=((20, 20)))
    plt.savefig(filename_dict['analysis_path'] + 'images/var_distribution/' +
                name + '.png',
                dpi=500,
                facecolor='white')
    plt.close()
def data_prep(inputCfg, iBin, PtBin, OutPutDirPt, PromptDf, FDDf, BkgDf):  #pylint: disable=too-many-statements, too-many-branches
    '''
    function for data preparation
    '''
    nPrompt = len(PromptDf)
    nFD = len(FDDf)
    nBkg = len(BkgDf)
    if FDDf.empty:
        out = f'\n     Signal: {nPrompt}\n     Bkg: {nBkg}'
    else:
        out = f'\n     Prompt: {nPrompt}\n     FD: {nFD}\n     Bkg: {nBkg}'
    print(
        f'Number of available candidates in {PtBin[0]} < pT < {PtBin[1]} GeV/c:{out}'
    )

    dataset_opt = inputCfg['data_prep']['dataset_opt']
    seed_split = inputCfg['data_prep']['seed_split']
    test_f = inputCfg['data_prep']['test_fraction']

    if dataset_opt == 'equal':
        if FDDf.empty:
            nCandToKeep = min([nPrompt, nBkg])
            out = 'signal'
            out2 = 'signal'
        else:
            nCandToKeep = min([nPrompt, nFD, nBkg])
            out = 'prompt, FD'
            out2 = 'prompt'
        print((
            f'Keep same number of {out} and background (minimum) for training and '
            f'testing ({1 - test_f}-{test_f}): {nCandToKeep}'))
        print(
            f'Fraction of real data candidates used for ML: {nCandToKeep/nBkg:.5f}'
        )

        if nPrompt > nCandToKeep:
            print((f'Remaining {out2} candidates ({nPrompt - nCandToKeep})'
                   'will be used for the efficiency together with test set'))
        if nFD > nCandToKeep:
            print((
                f'Remaining FD candidates ({nFD - nCandToKeep}) will be used for the '
                'efficiency together with test set'))

        TotDf = pd.concat([
            BkgDf.iloc[:nCandToKeep], PromptDf.iloc[:nCandToKeep],
            FDDf.iloc[:nCandToKeep]
        ],
                          sort=True)
        if FDDf.empty:
            LabelsArray = np.array([0] * nCandToKeep + [1] * nCandToKeep)
        else:
            LabelsArray = np.array([0] * nCandToKeep + [1] * nCandToKeep +
                                   [2] * nCandToKeep)
        if test_f < 1:
            TrainSet, TestSet, yTrain, yTest = train_test_split(
                TotDf, LabelsArray, test_size=test_f, random_state=seed_split)
        else:
            TrainSet = pd.DataFrame()
            TestSet = TotDf.copy()
            yTrain = pd.Series()
            yTest = LabelsArray.copy()

        TrainTestData = [TrainSet, yTrain, TestSet, yTest]
        PromptDfSelForEff = pd.concat([
            PromptDf.iloc[nCandToKeep:], TestSet[pd.Series(yTest).array == 1]
        ],
                                      sort=False)
        if FDDf.empty:
            FDDfSelForEff = pd.DataFrame()
        else:
            FDDfSelForEff = pd.concat([
                FDDf.iloc[nCandToKeep:], TestSet[pd.Series(yTest).array == 2]
            ],
                                      sort=False)
        del TotDf

    elif dataset_opt == 'max_signal':
        nCandBkg = round(inputCfg['data_prep']['bkg_mult'][iBin] *
                         (nPrompt + nFD))
        out = 'signal' if FDDf.empty else 'prompt and FD'
        print((
            f'Keep all {out} and use {nCandBkg} bkg candidates for training and '
            f'testing ({1 - test_f}-{test_f})'))
        if nCandBkg >= nBkg:
            nCandBkg = nBkg
            print('\033[93mWARNING: using all bkg available, not good!\033[0m')
        print(
            f'Fraction of real data candidates used for ML: {nCandBkg/nBkg:.5f}'
        )

        TotDf = pd.concat([BkgDf.iloc[:nCandBkg], PromptDf, FDDf], sort=True)
        if FDDf.empty:
            LabelsArray = np.array([0] * nCandBkg + [1] * nPrompt)
        else:
            LabelsArray = np.array([0] * nCandBkg + [1] * nPrompt + [2] * nFD)
        if test_f < 1:
            TrainSet, TestSet, yTrain, yTest = train_test_split(
                TotDf, LabelsArray, test_size=test_f, random_state=seed_split)
        else:
            TrainSet = pd.DataFrame()
            TestSet = TotDf.copy()
            yTrain = pd.Series()
            yTest = LabelsArray.copy()

        TrainTestData = [TrainSet, yTrain, TestSet, yTest]
        PromptDfSelForEff = TestSet[pd.Series(yTest).array == 1]
        FDDfSelForEff = pd.DataFrame() if FDDf.empty else TestSet[pd.Series(
            yTest).array == 2]
        del TotDf

    else:
        print(f'\033[91mERROR: {dataset_opt} is not a valid option!\033[0m')
        sys.exit()

    # plots
    VarsToDraw = inputCfg['plots']['plotting_columns']
    LegLabels = [
        inputCfg['output']['leg_labels']['Bkg'],
        inputCfg['output']['leg_labels']['Prompt']
    ]
    if inputCfg['output']['leg_labels']['FD'] is not None:
        LegLabels.append(inputCfg['output']['leg_labels']['FD'])
    OutputLabels = [
        inputCfg['output']['out_labels']['Bkg'],
        inputCfg['output']['out_labels']['Prompt']
    ]
    if inputCfg['output']['out_labels']['FD'] is not None:
        OutputLabels.append(inputCfg['output']['out_labels']['FD'])
    ListDf = [BkgDf, PromptDf] if FDDf.empty else [BkgDf, PromptDf, FDDf]
    #_____________________________________________
    plot_utils.plot_distr(ListDf,
                          VarsToDraw,
                          100,
                          LegLabels,
                          figsize=(12, 7),
                          alpha=0.3,
                          log=True,
                          grid=False,
                          density=True)
    plt.subplots_adjust(left=0.06,
                        bottom=0.06,
                        right=0.99,
                        top=0.96,
                        hspace=0.55,
                        wspace=0.55)
    plt.savefig(f'{OutPutDirPt}/DistributionsAll_pT_{PtBin[0]}_{PtBin[1]}.pdf')
    plt.close('all')
    #_____________________________________________
    CorrMatrixFig = plot_utils.plot_corr(ListDf, VarsToDraw, LegLabels)
    for Fig, Lab in zip(CorrMatrixFig, OutputLabels):
        plt.figure(Fig.number)
        plt.subplots_adjust(left=0.2, bottom=0.25, right=0.95, top=0.9)
        Fig.savefig(
            f'{OutPutDirPt}/CorrMatrix{Lab}_pT_{PtBin[0]}_{PtBin[1]}.pdf')

    return TrainTestData, PromptDfSelForEff, FDDfSelForEff
Exemplo n.º 5
0
        background_tree_handler = TreeHandler()
        prompt_tree_handler.set_data_frame(df_prompt_ct)
        non_prompt_tree_handler.set_data_frame(df_non_prompt_ct)
        background_tree_handler.set_data_frame(df_background_ct)
        del df_prompt_ct, df_non_prompt_ct, df_background_ct

        if not os.path.isdir(f'{PLOT_DIR}/features'):
            os.mkdir(f'{PLOT_DIR}/features')

        leg_labels = ['background', 'non-prompt', 'prompt']
        plot_distr = plot_utils.plot_distr([
            background_tree_handler, non_prompt_tree_handler,
            prompt_tree_handler
        ],
                                           TRAINING_COLUMNS_LIST,
                                           bins=40,
                                           labels=leg_labels,
                                           log=True,
                                           density=True,
                                           figsize=(12, 12),
                                           alpha=0.5,
                                           grid=False)
        plt.subplots_adjust(left=0.06,
                            bottom=0.06,
                            right=0.99,
                            top=0.96,
                            hspace=0.50,
                            wspace=0.50)
        plt.tight_layout()
        plt.savefig(f'{PLOT_DIR}/features/FeaturePlots.pdf')
        bkg_corr = plot_utils.plot_corr([background_tree_handler],
                                        TRAINING_COLUMNS_LIST, ['Background'])
Exemplo n.º 6
0
def test_plot_distr():
    """
    Test the feature distribution plot
    """
    assert isinstance(plot_utils.plot_distr(
        [SIG_DF, BKG_DF], SIG_DF.columns), np.ndarray)
Exemplo n.º 7
0
def data_prep(inputCfg, iBin, PtMin, PtMax, OutPutDirPt, DataDf, PromptDf,
              FDDf):  #pylint: disable=too-many-statements
    '''
    function for data preparation
    '''
    DataDfPtSel = DataDf.query(f'{PtMin} < pt_cand < {PtMax}')
    BkgDfPtSel = DataDfPtSel.query(inputCfg['data_prep']['filt_bkg_mass'])
    PromptDfPtSel = PromptDf.query(f'{PtMin} < pt_cand < {PtMax}')
    FDDfPtSel = FDDf.query(f'{PtMin} < pt_cand < {PtMax}')

    nPrompt = len(PromptDfPtSel)
    nFD = len(FDDfPtSel)
    nBkg = len(BkgDfPtSel)
    print((
        f'Number of available candidates in {PtMin} < pT < {PtMax} GeV/c:\n     Prompt: {nPrompt}'
        f'\n     FD: {nFD}\n     Bkg: {nBkg}'))

    dataset_opt = inputCfg['data_prep']['dataset_opt']
    seed_split = inputCfg['data_prep']['seed_split']
    test_f = inputCfg['data_prep']['test_fraction']

    if dataset_opt == 'equal':

        nCandToKeep = min([nPrompt, nFD, nBkg])
        print((
            'Keep same number of prompt, FD, and background (minimum) for training and '
            f'testing ({1 - test_f}-{test_f}): {nCandToKeep}'))
        print(
            f'Fraction of real data candidates used for ML: {nCandToKeep/nBkg:.5f}'
        )

        if nPrompt > nCandToKeep:
            print((f'Remaining prompt candidates ({nPrompt - nCandToKeep})'
                   'will be used for the efficiency together with test set'))
        if nFD > nCandToKeep:
            print((
                f'Remaining FD candidates ({nFD - nCandToKeep}) will be used for the '
                'efficiency together with test set'))

        TotDfPtSel = pd.concat([
            BkgDfPtSel.iloc[:nCandToKeep], PromptDfPtSel.iloc[:nCandToKeep],
            FDDfPtSel.iloc[:nCandToKeep]
        ],
                               sort=True)
        LabelsArray = [0] * nCandToKeep + [1] * nCandToKeep + [2] * nCandToKeep
        TrainSet, TestSet, yTrain, yTest = train_test_split(
            TotDfPtSel, LabelsArray, test_size=test_f, random_state=seed_split)
        TrainTestData = [TrainSet, yTrain, TestSet, yTest]
        CandTypeFlags = pd.Series(yTest)
        PromptDfPtSelForEff = pd.concat([
            PromptDfPtSel.iloc[nCandToKeep:],
            TestSet[CandTypeFlags.values == 1]
        ],
                                        sort=False)
        FDDfPtSelForEff = pd.concat(
            [FDDfPtSel.iloc[nCandToKeep:], TestSet[CandTypeFlags.values == 2]],
            sort=False)
        del TotDfPtSel

    elif dataset_opt == 'max_signal':

        nCandBkg = round(inputCfg['ml']['bkg_mult'][iBin] * (nPrompt + nFD))
        print((
            f'Keep all prompt and FD and use {nCandBkg} bkg candidates for training and '
            f'testing ({1 - test_f}-{test_f})'))
        if nCandBkg >= nBkg:
            nCandBkg = nBkg
            print('WARNING: using all bkg available, not good!')
        print(
            f'Fraction of real data candidates used for ML: {nCandBkg/nBkg:.5f}'
        )

        TotDfPtSel = pd.concat(
            [BkgDfPtSel.iloc[:nCandBkg], PromptDfPtSel, FDDfPtSel], sort=True)
        LabelsArray = [0] * nCandBkg + [1] * nPrompt + [2] * nFD
        TrainSet, TestSet, yTrain, yTest = train_test_split(
            TotDfPtSel, LabelsArray, test_size=test_f, random_state=seed_split)
        TrainTestData = [TrainSet, yTrain, TestSet, yTest]
        CandTypeFlags = pd.Series(yTest)
        PromptDfPtSelForEff = TestSet[CandTypeFlags.values == 1]
        FDDfPtSelForEff = TestSet[CandTypeFlags.values == 2]
        del TotDfPtSel

    else:
        print(f'ERROR: {dataset_opt} is not a valid option!')
        sys.exit()

    # plots
    VarsToDraw = inputCfg['ml']['plotting_columns']
    LegLabels = inputCfg['output']['leg_labels']
    OutputLabels = inputCfg['output']['out_labels']
    #_____________________________________________
    plot_utils.plot_distr([BkgDfPtSel, PromptDfPtSel, FDDfPtSel], VarsToDraw,
                          (12, 7), 100, True, LegLabels, 0.3)
    plt.subplots_adjust(left=0.06,
                        bottom=0.06,
                        right=0.99,
                        top=0.96,
                        hspace=0.55,
                        wspace=0.55)
    plt.savefig(f'{OutPutDirPt}/DistributionsAll_pT_{PtMin}_{PtMax}.pdf')
    plt.close('all')
    #_____________________________________________
    CorrMatrixFig = plot_utils.plot_corr(
        [BkgDfPtSel, PromptDfPtSel, FDDfPtSel], VarsToDraw, LegLabels)
    for Fig, Lab in zip(CorrMatrixFig, OutputLabels):
        plt.figure(Fig.number)
        plt.subplots_adjust(left=0.2, bottom=0.25, right=0.95, top=0.9)
        Fig.savefig(f'{OutPutDirPt}/CorrMatrix{Lab}_pT_{PtMin}_{PtMax}.pdf')

    del BkgDfPtSel, PromptDfPtSel, FDDfPtSel
    return TrainTestData, DataDfPtSel, PromptDfPtSelForEff, FDDfPtSelForEff
Exemplo n.º 8
0
def main():
    # read config file
    parser = argparse.ArgumentParser(description='Arguments to pass')
    parser.add_argument('cfgFileName',
                        metavar='text',
                        default='cfgFileNameCheck.yml',
                        help='config file name for check')
    args = parser.parse_args()

    print('Loading check configuration: ...', end='\r')
    with open(args.cfgFileName, 'r') as ymlCfgFile:
        inputCfg = yaml.load(ymlCfgFile, yaml.FullLoader)
    print('Loading check configuration: Done!')

    print('Loading data files: ...', end='\r')
    DfList = []
    inDirName = inputCfg['input']['dirname']
    inTreeName = inputCfg['input']['treename']
    for filePath in inputCfg['input']['files']:
        DfList.append(LoadDfFromRootOrParquet(filePath, inDirName, inTreeName))

    print('Loading data files: Done!')

    print('Appling simple pre-filtering: ...', end='\r')
    DfListSel = []
    for df, query in zip(DfList, inputCfg['queries']):
        DfListSel.append(df.query(query))
    print('Pre-filtering: Done!')
    del DfList

    VarsToDraw = inputCfg['plotting_columns']
    LegLabels = inputCfg['output']['leg_labels']
    Colors = inputCfg['output']['colors']
    OutPutDir = inputCfg['output']['dir']

    for PtMin, PtMax, LimMin, LimMax in zip(inputCfg['pt_ranges']['min'],
                                            inputCfg['pt_ranges']['max'],
                                            inputCfg['plot_lim_min'],
                                            inputCfg['plot_lim_max']):
        print(f'Plot variable distributions --- {PtMin} < pT < {PtMax} GeV/c')
        DfListPt = []
        for df in DfListSel:
            DfListPt.append(df.query(f'{PtMin} < pt_cand < {PtMax}'))
        #print(len(DfListPt), len(Colors))
        DistrPlot = plot_utils.plot_distr(DfListPt,
                                          VarsToDraw,
                                          1000,
                                          LegLabels,
                                          figsize=(6, 6),
                                          density=True,
                                          histtype='stepfilled',
                                          grid=False,
                                          log=True,
                                          colors=Colors,
                                          alpha=0.3)
        plt.subplots_adjust(left=0.1,
                            bottom=0.05,
                            right=0.95,
                            top=0.95,
                            hspace=0.4)
        if not isinstance(DistrPlot, np.ndarray):
            DistrPlot = np.array([DistrPlot])
        print(len(DistrPlot), len(LimMin), len(LimMax),
              len(inputCfg['xaxes_label']))
        for ax, minVar, maxVar, xLabel in zip(DistrPlot, LimMin, LimMax,
                                              inputCfg['xaxes_label']):
            ax.set_xlim(minVar, maxVar)

            ax.set_xlabel(xLabel, fontsize=10, ha='right', position=(1, 20))
            ax.set_ylabel('Counts (arb. units)',
                          fontsize=10,
                          ha='right',
                          position=(20, 1))
            plt.legend(frameon=False, fontsize=10, loc='best')

            ax.set_title('')
            '''
             textstr = r'pp, $\sqrt{s}$ = 5.02 TeV'
             textstr2 = r'$3 < p_{\mathrm{T}} < 4~\mathrm{GeV}/c$'

             ax.text(0.56, 0.75, textstr, transform=ax.transAxes, fontsize=15,
                    verticalalignment='top')
             ax.text(0.56, 0.69, textstr2, transform=ax.transAxes, fontsize=15,
                    verticalalignment='top')
            '''
            plt.tight_layout()
        plt.savefig(f'{OutPutDir}/NsigzoomDistrComp_pT_{PtMin}_{PtMax}.pdf')
        plt.close('all')
        del DfListPt

    del DfListSel
LegLabels = ['before selection', 'after selection']
varsToRemove = ['pt_B'] # HARD CODED

for (cuts, ptMin, ptMax) in zip(selToApply, cutVars['Pt']['min'], cutVars['Pt']['max']):
    print(f'Projecting distributions for {ptMin:.1f} < pT < {ptMax:.1f} GeV/c')
    if isMC:
        dfPromptList = [dfPrompt.query(f'{ptMin} < pt_cand < {ptMax}'), dfPrompt.astype(float).query(cuts)]
        dfFDList = [dfFD.query(f'{ptMin} < pt_cand < {ptMax}'), dfFD.astype(float).query(cuts)]

        varsToDraw = list(dfPromptList[0].columns)
        for varToRemove in varsToRemove:
            if varToRemove in varsToDraw:
                varsToDraw.remove(varToRemove)


        plot_utils.plot_distr(dfPromptList, varsToDraw, 100, LegLabels, figsize=(12, 7), density=True)
        plt.subplots_adjust(left=0.06, bottom=0.06, right=0.99, top=0.96, hspace=0.55, wspace=0.55)
        plt.savefig(f'{args.outputDir}/PromptDistrCompBeforeAfterSel_pT_{ptMin}_{ptMax}.pdf')
        plt.close('all')
        del dfPromptList

        plot_utils.plot_distr(dfFDList, varsToDraw, 100, LegLabels, figsize=(12, 7), density=True)
        plt.subplots_adjust(left=0.06, bottom=0.06, right=0.99, top=0.96, hspace=0.55, wspace=0.55)
        plt.savefig(f'{args.outputDir}/FDDistrCompBeforeAfterSel_pT_{ptMin}_{ptMax}.pdf')
        plt.close('all')
        del dfFDList

    else:
        dfAllList = [dfAll.query(f'{ptMin} < pt_cand < {ptMax}'), dfAll.astype(float).query(cuts)]

        varsToDraw = list(dfAllList[0].columns)
HYP_RANGES = {
    # # defines the maximum depth of a single tree (regularization)
    'max_depth': (5, 15),
    # 'learning_rate': (0.01, 0.3),  # learning rate
    'n_estimators': (5, 10),  # number of boosting trees
}
MODEL.optimize_params_bayes(DATA, HYP_RANGES, 'roc_auc')

# train and test the model with the updated hyperparameters
MODEL.train_test_model(DATA)
Y_PRED = MODEL.predict(DATA[2])

# Calculate the BDT efficiency as a function of the BDT score
EFFICIENCY, THRESHOLD = analysis_utils.bdt_efficiency_array(
    DATA[3], Y_PRED, n_points=10)
# --------------------------------------------


# PLOTTING
# --------------------------------------------
FEATURES_DISTRIBUTIONS_PLOT = plot_utils.plot_distr(
    [SIG_DF, BKG_DF], SIG_DF.columns)
CORRELATION_MATRIX_PLOT = plot_utils.plot_corr([SIG_DF, BKG_DF], SIG_DF.columns)
BDT_OUTPUT_PLOT = plot_utils.plot_output_train_test(MODEL, DATA)
ROC_CURVE_PLOT = plot_utils.plot_roc(DATA[3], Y_PRED)
PRECISION_RECALL_PLOT = plot_utils.plot_precision_recall(DATA[3], Y_PRED)
BDT_EFFICIENCY_PLOT = plot_utils.plot_bdt_eff(THRESHOLD, EFFICIENCY)
FEATURES_IMPORTANCE = plot_utils.plot_feature_imp(TEST_SET, Y_TEST, MODEL)
plt.show()
# ---------------------------------------------
Exemplo n.º 11
0
                    size=int(0.8 * signal_tree_handler.get_n_cand()),
                    rndm_state=RANDOM_STATE)
                del background_tree_handler_full

                # features plot
                leg_labels = ['background', 'signal']
                # second condition needed because of issue with Qt libraries
                if MAKE_FEATURES_PLOTS and not MAKE_PRESELECTION_EFFICIENCY:
                    if not os.path.isdir(f'{PLOT_DIR}/features'):
                        os.mkdir(f'{PLOT_DIR}/features')

                    plot_utils.plot_distr(
                        [background_tree_handler, signal_tree_handler],
                        TRAINING_COLUMNS_LIST,
                        bins=50,
                        labels=leg_labels,
                        log=True,
                        density=True,
                        figsize=(12, 7),
                        alpha=0.3,
                        grid=False)
                    plt.subplots_adjust(left=0.06,
                                        bottom=0.06,
                                        right=0.99,
                                        top=0.96,
                                        hspace=0.55,
                                        wspace=0.55)
                    plt.savefig(f'{PLOT_DIR}/features/FeaturePlots_{bin}')
                    plot_utils.plot_corr([background_tree_handler],
                                         TRAINING_COLUMNS_LIST, ['background'])
                    plt.savefig(
                        f'{PLOT_DIR}/features/BackgroundCorrelationMatrix_{bin}'
    def do_hipe4mlplot(self):
        self.logger.info("Plotting hipe4ml model")

        leglabels = ["Background", "Prompt signal"]
        outputlabels = ["Bkg", "SigPrompt"]

        # _____________________________________________
        plot_utils.plot_distr([self.bkghandler, self.signalhandler],
                              self.v_train, 100, leglabels)
        plt.subplots_adjust(left=0.06,
                            bottom=0.06,
                            right=0.99,
                            top=0.96,
                            hspace=0.55,
                            wspace=0.55)
        figname = f'{self.dirmlplot}/DistributionsAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        plt.savefig(figname)
        plt.close('all')
        # _____________________________________________
        corrmatrixfig = plot_utils.plot_corr(
            [self.bkghandler, self.signalhandler], self.v_train, leglabels)
        for figg, labb in zip(corrmatrixfig, outputlabels):
            plt.figure(figg.number)
            plt.subplots_adjust(left=0.2, bottom=0.25, right=0.95, top=0.9)
            figname = f'{self.dirmlplot}/CorrMatrix{labb}_pT_{self.p_binmin}_{self.p_binmax}.pdf'
            figg.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 7)
        mloutputfig = plot_utils.plot_output_train_test(
            self.p_hipe4ml_model,
            self.traintestdata,
            80,
            self.raw_output_hipe4ml,
            leglabels,
            self.train_test_log_hipe4ml,
            density=True)
        figname = f'{self.dirmlplot}/MLOutputDistr_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        mloutputfig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 9)
        roccurvefig = plot_utils.plot_roc(self.traintestdata[3],
                                          self.ypredtest_hipe4ml, None,
                                          leglabels,
                                          self.average_method_hipe4ml,
                                          self.roc_method_hipe4ml)
        figname = f'{self.dirmlplot}/ROCCurveAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        roccurvefig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (10, 9)
        roccurvettfig = plot_utils.plot_roc_train_test(
            self.traintestdata[3], self.ypredtest_hipe4ml,
            self.traintestdata[1], self.ypredtrain_hipe4ml, None, leglabels,
            self.average_method_hipe4ml, self.roc_method_hipe4ml)
        figname = f'{self.dirmlplot}/ROCCurveTrainTest_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        roccurvettfig.savefig(figname)
        # _____________________________________________
        precisionrecallfig = plot_utils.plot_precision_recall(
            self.traintestdata[3], self.ypredtest_hipe4ml, leglabels)
        figname = f'{self.dirmlplot}/PrecisionRecallAll_pT_{self.p_binmin}_{self.p_binmax}.pdf'
        precisionrecallfig.savefig(figname)
        # _____________________________________________
        plt.rcParams["figure.figsize"] = (12, 7)
        featuresimportancefig = plot_utils.plot_feature_imp(
            self.traintestdata[2][self.v_train], self.traintestdata[3],
            self.p_hipe4ml_model, leglabels)
        for i in range(0, len(featuresimportancefig)):
            figname = (f'{self.dirmlplot}/FeatureImportanceOpt{i}_'
                       f'pT_{self.p_binmin}_{self.p_binmax}.pdf')
            featuresimportancefig[i].savefig(figname)
        ]
    else:
        training_columns = [
            'TPCnSigmaHe3', 'ct', 'V0CosPA', 'ProngsDCA', 'He3ProngPvDCA',
            'PiProngPvDCA', 'He3ProngPvDCAXY', 'PiProngPvDCAXY',
            'NpidClustersHe3', 'TPCnSigmaPi'
        ]

    if not os.path.exists(results_ml_path):
        os.makedirs(results_ml_path)

    distr = pu.plot_distr([bkgH, signalH],
                          training_columns,
                          bins=63,
                          labels=['Signal', "Background"],
                          colors=["blue", "red"],
                          log=True,
                          density=True,
                          figsize=(18, 13),
                          alpha=0.3,
                          grid=False)
    plt.subplots_adjust(left=0.06,
                        bottom=0.06,
                        right=0.99,
                        top=0.96,
                        hspace=0.55,
                        wspace=0.55)
    plt.savefig(results_ml_path + "/features_distributions.png",
                bbox_inches='tight')
    corr = pu.plot_corr([signalH, bkgH], training_columns + ["m"],
                        ['Signal', "Background"])
    corr[0].savefig(results_ml_path + "/correlations.png", bbox_inches='tight')