Python get_cclassifier_with_name示例，classifiers.get_cclassifier_with_name Python示例

示例#1

0

显示文件

def generate_plots_for_report():
    """
    Generate all plots that are used in the report.
    Notes:
        - There are some hyperparameters that were tuned on Euler and are used per default. If you want to tune
             them "manually/on your computer", use the flag -u (see main.py). The number of iterations used for
             RandomizedSearchCV can be set in classifiers.py
        - Some plots were modified manually for the report, such as removing titles

    """
    
    # Plot ROC curve of all classifiers
    model_factory.plot_roc_curves(True, True, with_lstm=False)

    _plot_heartrate_change()
    _plot_difficulties()
    _plot_mean_value_of_heartrate_at_crash()
    _plot_feature_correlation_matrix(reduced_features=False)
    _plot_heartrate_and_events()

    X, y = f_factory.get_feature_matrix_and_label(
        verbose=False,
        use_cached_feature_matrix=True,
        save_as_pickle_file=True,
        reduced_features=False,
        use_boxcox=False
    )

    # Plot example of a Decision Tree by taking first tree of tuned random forest
    decision_tree_clf = classifiers.get_cclassifier_with_name('Random Forest', X, y).tuned_clf

    model_factory.get_performance(decision_tree_clf, 'Random Forest', X, y, None,
                                  verbose=False, create_curves=False)

    # Plot ROC curve of Nearest Neighbor classifier (J-Index in report was added manually...)
    print('Plotting ROC curve of Nearest Neighbor classifier...')

    nearest_neighbor_clf = classifiers.get_cclassifier_with_name('Nearest Neighbor', X, y).tuned_clf
    model_factory.get_performance(nearest_neighbor_clf, 'Nearest Neighbor', X, y, None,
                                  verbose=False, create_curves=True)

    # The following plots take a little longer, so only uncomment them if you really want them
    '''

示例#2

0

显示文件

文件： model_factory.py 项目： bastianmorath/4Ps-Plunder-Planet

def _test_clf_with_timedelta_only():
    """
    (Debugging purposes only). Calculates timedelta feature without using any other features. Since this also gives
    a good score, the timedelta_feature really is a good predictor!

    """

    print("\n################# Testing classifier using timedelta feature only #################\n")

    df_list = random.sample(sd.df_list, len(sd.df_list))
    # Compute y_true for each logfile
    y_list = []
    for df in df_list:
        y_true = []
        for _, row in df.iterrows():
            if (row['Logtype'] == 'EVENT_CRASH') | (row['Logtype'] == 'EVENT_OBSTACLE'):
                y_true.append(1 if row['Logtype'] == 'EVENT_CRASH' else 0)
        y_list.append(y_true)

    # compute feature matrix for each logfile
    X_matrices = []
    for df in df_list:
        X = []
        for _, row in df.iterrows():
            if (row['Logtype'] == 'EVENT_CRASH') | (row['Logtype'] == 'EVENT_OBSTACLE'):
                last_obstacles = df[(df['Time'] < row['Time']) & ((df['Logtype'] == 'EVENT_OBSTACLE') |
                                                                  (df['Logtype'] == 'EVENT_CRASH'))]
                if last_obstacles.empty:
                    X.append(2)
                else:
                    X.append(row['Time'] - last_obstacles.iloc[-1]['Time'])

        X_matrices.append(X)

    x_train = np.hstack(X_matrices).reshape(-1, 1)  # reshape bc. only one feature
    y_train = np.hstack(y_list).reshape(-1, 1)

    clf = classifiers.get_cclassifier_with_name('Decision Tree', x_train, y_train).clf
    score_dict = cross_validate(clf, x_train, y_train, scoring='roc_auc', cv=10)

    print('Mean roc_auc score with cross_validate: ' + str(np.mean(score_dict['test_score'])))

    '''

示例#3

0

显示文件

def get_tuned_clf_and_tuned_hyperparameters(X,
                                            y,
                                            clf_name='svm',
                                            verbose=True,
                                            pre_set=True):
    """
    This method optimizes hyperparameters with cross-validation using RandomizedSearchCV, optionally creates a ROC curve
    and returns this optimized classifier and the tuned parameters

    :param X: Feature matrix
    :param y: labels
    :param clf_name:  Name of the classifier as given in classifiers.py
    :param verbose: Whether scores of top hyperparameter configurations should be printed out
    :param pre_set: Some classifiers have pre_tuned parameters (on Euler). Take those

    :return: optimized classifier, dictionary of tuned_params

    """
    c_classifier = classifiers.get_cclassifier_with_name(clf_name, X, y)

    if clf_name == 'Naive Bayes':  # Naive Bayes doesn't have any hyperparameters to tune
        if synthesized_data.synthesized_data_enabled:
            X_n, y_n = f_factory.get_feature_matrix_and_label(
                False, False, True, True, False)
        else:
            X_n, y_n = f_factory.get_feature_matrix_and_label(
                True, True, True, True, False)

        c_classifier.clf.fit(X_n, y_n)

        return c_classifier.clf, []

    else:
        if pre_set and hasattr(c_classifier, 'tuned_clf'):
            print('Hyperparameters for ' + clf_name +
                  ' already got tuned, taking those pre-set parameters')
            return c_classifier.tuned_clf, model_factory.get_tuned_params_dict(
                c_classifier.tuned_clf, list(c_classifier.tuned_params.keys()))
        else:
            print('Doing RandomizedSearchCV with n_iter=' +
                  str(c_classifier.num_iter) + ' for ' + clf_name + '...')
            start = time.time()
            scaler = MinMaxScaler(feature_range=(0, 1))
            corr = FindCorrelation(threshold=0.9)

            p = make_pipeline(scaler, corr, c_classifier.clf)
            params = dict(
                (c_classifier.estimator_name + '__' + key, value)
                for (key, value) in c_classifier.tuned_params.items())
            clf = RandomizedSearchCV(p,
                                     params,
                                     cv=3,
                                     scoring='roc_auc',
                                     n_iter=c_classifier.num_iter)
            clf.fit(X, y)
            end = time.time()

            print("Time elapsed for hyperparameter tuning: " +
                  str(end - start))

            if verbose:
                _report(clf.cv_results_)

            clf = clf.best_estimator_.steps[2][1]  # Unwrap pieline object

            return clf, model_factory.get_tuned_params_dict(
                clf, list(c_classifier.tuned_params.keys()))

示例#4

0

显示文件

def main(args):
    """
    Call '$ python main.py -h' to see how to use this module
    :param args: ArgumentParser

    """

    start = time.time()

    assert (not (args.use_synthesized_data and args.leave_one_group_out)), \
        'Can\'t do leave_one_group_out with synthesized data'

    if args.use_synthesized_data:
        print('Creating synthesized data...')

        synthesized_data.init_with_testdata_events_const_hr_const()

        X, y = f_factory.get_feature_matrix_and_label(
            verbose=True,
            use_cached_feature_matrix=False,
            save_as_pickle_file=False,
            reduced_features=False)

    else:
        setup_dataframes.setup(
            fewer_data=args.
            debugging,  # Specify if we want fewer data (for debugging purposes...)
            normalize_heartrate=(not args.do_not_normalize_heartrate),
            remove_tutorials=False)

        X, y = f_factory.get_feature_matrix_and_label(
            verbose=True,
            use_cached_feature_matrix=True,
            save_as_pickle_file=True,
            reduced_features=False,
            use_boxcox=False)

    if args.print_keynumbers_logfiles:
        print("\n################# Printing keynumbers #################\n")

        setup_dataframes.print_keynumbers_logfiles()

    if args.test_windows:
        print("\n################# Window optimization #################\n")
        window_optimization.performance_score_for_windows(
            args.test_windows[0],
            args.test_windows[1],
            args.test_windows[2],
            verbose=True,
            write_to_file=True,
        )

    if args.performance_without_tuning or args.performance_with_tuning:
        pre_set = not args.do_not_use_pre_tuned_hyperparameters
        if args.performance_with_tuning:
            print(
                "\n################# Calculating performance with hyperparameter tuning #################\n"
            )
        else:
            print(
                "\n################# Calculating performance without hyperparameter tuning #################\n"
            )

        # Note: The number of iterations in RandomizedSearchCV can be set in classifiers.py
        if args.performance_without_tuning == 'all' or args.performance_with_tuning == 'all':
            model_factory. \
                calculate_performance_of_classifiers(X, y, tune_hyperparameters=args.performance_with_tuning,
                                                     reduced_clfs=True, pre_set=pre_set)
        else:
            X_old = X
            y_old = y
            if (args.performance_with_tuning
                    == 'Naive Bayes') or (args.performance_without_tuning
                                          == 'Naive Bayes'):
                X, y = f_factory.get_feature_matrix_and_label(
                    verbose=False,
                    use_cached_feature_matrix=True,
                    save_as_pickle_file=True,
                    use_boxcox=True,
                    reduced_features=False)

            if args.performance_with_tuning:
                clf, tuned_params = hyperparameter_optimization.get_tuned_clf_and_tuned_hyperparameters(
                    X,
                    y,
                    clf_name=args.performance_with_tuning,
                    pre_set=pre_set,
                )

                _, _, _, _, _, _, _, _, _, _, _, report = model_factory.get_performance(
                    clf,
                    args.performance_with_tuning,
                    X,
                    y,
                    tuned_params,
                    verbose=True,
                    do_write_to_file=False)
            else:
                model = classifiers.get_cclassifier_with_name(
                    args.performance_without_tuning, X, y)

                _, _, _, _, _, _, _, _, _, _, _, report = model_factory.get_performance(
                    model.clf,
                    args.performance_without_tuning,
                    X,
                    y,
                    verbose=True,
                    do_write_to_file=False)
            X = X_old
            y = y_old

            print(report)

    if args.leave_one_group_out:
        print("\n################# Leave one out #################\n")
        leave_one_group_out_cv.clf_performance_with_user_left_out_vs_normal(
            X, y, True, reduced_features=False, reduced_classifiers=True)

    if args.evaluate_lstm:
        print("\n################# Get trained LSTM #################\n")
        LSTM.get_performance_of_lstm_classifier(X,
                                                y,
                                                n_epochs=args.evaluate_lstm[0])
        # LSTM.get_finalscore(X, y, n_epochs=args.evaluate_lstm[0])

    if args.generate_plots_about_features:
        print(
            "\n################# Generate plots about features #################\n"
        )
        plot_features(X, y)

    if args.generate_plots_about_logfiles:
        print(
            "\n################# Generate plots about logfiles #################\n"
        )
        plot_logfiles(args)

    if args.generate_plots_for_report:
        print(
            "\n################# Generate plots for report #################\n"
        )
        plots_report.generate_plots_for_report()

    end = time.time()
    print("Time elapsed: " + str(end - start))

示例#5

0

显示文件

文件： window_optimization.py 项目： bastianmorath/4Ps-Plunder-Planet

def test_all_windows():
    """
    Keeps one window fixed and changes the other two. Calculates the roc_auc of the Random Forest with
    pre-tuned parameters for each window combination and plots it.

    """
    print("\n################# Testing all window sizes #################\n")

    const_window = 'cw'

    const_w = 10
    list_1 = [5, 10, 20, 30, 50, 60]
    list_2 = list_1[::-1]

    if const_window == 'hw':
        name1 = 'Crash window (s)'
        name2 = 'Gradient window (s)'
        filename = 'windows_const_hw.pdf'
    elif const_window == 'cw':
        name1 = 'Default window (s)'
        name2 = 'Gradient window (s)'
        filename = 'windows_const_cw.pdf'
    else:
        name1 = 'Crash window'
        name2 = 'Default window'
        filename = 'windows_const_gradient_w.pdf'

    mean_scores = np.zeros((len(list_1), len(list_2)))
    model_name = 'Nearest Neighbor'
    for idx_w1, w1 in enumerate(list_1):
        for idx_w2, w2 in enumerate(list_2):
            if const_window == 'hw':
                X, y = f_factory.get_feature_matrix_and_label(
                    verbose=True,
                    use_cached_feature_matrix=True,
                    save_as_pickle_file=True,
                    h_window=const_w,
                    c_window=w1,
                    gradient_window=w2,
                    reduced_features=False)
                model = classifiers.get_cclassifier_with_name(
                    model_name, X, y).tuned_clf

                roc_auc_mean, roc_auc_std, _, _, _, _, _, _, _, _, _, _ = model_factory. \
                    get_performance(model, model_name,  X, y, tuned_params_keys=None, verbose=False,
                                    create_curves=False)

                mean_scores[idx_w1][idx_w2] = roc_auc_mean
            elif const_window == 'cw':
                X, y = f_factory.get_feature_matrix_and_label(
                    verbose=True,
                    use_cached_feature_matrix=True,
                    save_as_pickle_file=True,
                    h_window=w1,
                    c_window=const_w,
                    gradient_window=w2,
                    reduced_features=False)
                model = classifiers.get_cclassifier_with_name(
                    model_name, X, y).tuned_clf

                roc_auc_mean, roc_auc_std, _, _, _, _, _, _, _, _, _, _ = model_factory. \
                    get_performance(model, model_name, X, y, tuned_params_keys=None, verbose=False,
                                    create_curves=False)

                mean_scores[idx_w1][idx_w2] = roc_auc_mean
            else:
                X, y = f_factory.get_feature_matrix_and_label(
                    verbose=True,
                    use_cached_feature_matrix=True,
                    save_as_pickle_file=True,
                    h_window=w1,
                    c_window=w2,
                    gradient_window=const_w,
                    reduced_features=False)

                model = classifiers.get_cclassifier_with_name(
                    model_name, X, y).tuned_clf

                roc_auc_mean, roc_auc_std, _, _, _, _, _, _, _, _, _, _ = model_factory. \
                    get_performance(model, model_name, X, y, tuned_params_keys=None, verbose=False,
                                    create_curves=False)

                mean_scores[idx_w1][idx_w2] = roc_auc_mean

    mean_scores = np.fliplr(
        np.flipud(mean_scores))  # Flip to plot it correctly

    # Plot elements
    plt.subplot()
    plt.imshow(mean_scores, cmap='RdYlGn')
    plt.title('Average classifier performance when using constant ' +
              const_window)
    ax = plt.gca()
    ax.set_xticks(np.arange(0, len(list_1), 1))
    ax.set_yticks(np.arange(0, len(list_2), 1))
    ax.set_xticklabels(list_1)
    ax.set_yticklabels(list_2)
    ax.set_ylabel(name1)
    ax.set_xlabel(name2)
    plt.colorbar()
    plots_helpers.save_plot(plt, 'Performance/Windows/', filename)

示例#6

0

显示文件

文件： model_factory.py 项目： bastianmorath/4Ps-Plunder-Planet

def plot_roc_curves(hyperparameter_tuning=False, pre_set=True, with_lstm=False):
    """
    Plots roc_curves for all classifier in one single plot

    :param hyperparameter_tuning: Do hyperparameter tuning
    :param pre_set: Some classifiers have pre_tuned parameters (on Euler). Take those instead of tuning
    :param with_lstm: Also include ROC of LSTM network (takes a little time...)

    Folder:     Report/
    Plot name:  roc_curves.pdf

    """

    X, y = f_factory.get_feature_matrix_and_label(
                verbose=False,
                use_cached_feature_matrix=True,
                save_as_pickle_file=True,
                reduced_features=False,
                use_boxcox=False
        )

    clf_names = ['SVM', 'Nearest Neighbor', 'Random Forest', 'Naive Bayes']

    if pre_set:
        clf_list = [classifiers.get_cclassifier_with_name(name, X, y).tuned_clf for name in clf_names]
    else:
        clf_list = [classifiers.get_cclassifier_with_name(name, X, y).clf for name in clf_names]

    tprs = []
    fprs = []
    roc_aucs = []

    for idx, classifier in enumerate(clf_list):
        if hyperparameter_tuning:
            classifier, _ = hyperparameter_optimization.get_tuned_clf_and_tuned_hyperparameters(
                X, y, clf_name=clf_names[idx], verbose=False, pre_set=True
            )

        # clf = CalibratedClassifierCV(classifier)
        clf = classifier
        kf = KFold(n_splits=10)
        predicted_probas_list = []
        y = np.array(y)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            scaler = MinMaxScaler(feature_range=(0, 1))

            X_train = scaler.fit_transform(X_train)  # Fit and transform on trainig set, then transform test set too
            X_test = scaler.transform(X_test)

            corr = FindCorrelation(threshold=0.9)
            X_train = corr.fit(X_train).transform(X_train)
            X_test = corr.transform(X_test)

            clf.fit(X_train, y_train)

            predicted_probas = clf.predict_proba(X_test)
            predicted_probas_list.append(predicted_probas[:, 1])

        fpr, tpr, _ = roc_curve(y, list(itertools.chain.from_iterable(predicted_probas_list)))
        roc_auc = auc(fpr, tpr)
        fprs.append(fpr)
        tprs.append(tpr)
        roc_aucs.append(roc_auc)

    # Also add LSTM scores:
    if with_lstm:
        clf_names.append("LSTM")
        fpr, tpr, roc_auc = LSTM.create_roc_curve(X, y, 130)
        fprs.append(fpr)
        tprs.append(tpr)
        roc_aucs.append(roc_auc)

    plt.figure()

    for idx, name in enumerate(clf_names):
        plt.plot(fprs[idx], tprs[idx], label=name + ' (AUC = %0.2f)' % roc_aucs[idx])

    plt.title('Roc curves')
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], c='gray', ls='--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')

    plots_helpers.save_plot(plt, 'Report/', 'roc_curves.pdf')

示例#7

0

显示文件

文件： model_factory.py 项目： bastianmorath/4Ps-Plunder-Planet

def calculate_performance_of_classifiers(X, y, tune_hyperparameters=False, reduced_clfs=True,
                                         create_barchart=True, create_curves=True, do_write_to_file=True,
                                         pre_set=False):
    """Computes performance (roc_auc, recall, specificity, precision, confusion matrix) of either all
    or only reduced classifiers, and optionally writes it into a file and plots roc_auc scores  in a barchart.

    :param X:                       Feature matrix
    :param y:                       labels
    :param tune_hyperparameters:    Whether or not hyperparameter should be tuned
    :param reduced_clfs:            All classifiers, or only SVM, Nearest Neighbor, Random Forest and Naive Bayes
    :param create_barchart:         Create a barchart consisting of the roc_auc scores
    :param create_curves:           Create roc_curves and precision_recall curve
    :param do_write_to_file:        Write summary of performance into a file (optional)
    :param pre_set:                 Some classifiers have pre_tuned parameters (on Euler). Take those instead of tuning

    :return list of roc_aucs, list of roc_auc_stds (one score for each classifier) and formatted string of scores
    """

    if reduced_clfs:
        clf_names = classifiers.reduced_names
    else:
        clf_names = classifiers.names

    clf_list = [classifiers.get_cclassifier_with_name(name, X, y).clf for name in clf_names]

    if tune_hyperparameters or pre_set:
        clf_list = [hyperparameter_optimization.get_tuned_clf_and_tuned_hyperparameters(X, y, name,
                    verbose=False, pre_set=pre_set)[0] for name in clf_names]

    scores_mean = []
    scores_std = []
    names = []
    tuned_params = []
    conf_mats = []

    windows = str(f_factory.hw) + '_' + str(f_factory.cw) + '_' + str(f_factory.gradient_w)

    filename = 'clf_performances_with_hp_tuning_' + windows if tune_hyperparameters \
        else 'clf_performances_without_hp_tuning_' + windows

    for idx, clf in enumerate(clf_list):
        tuned_parameters = classifiers.get_cclassifier_with_name(clf_names[idx], X, y).tuned_params
        clf_name = clf_names[idx]
        names.append(clf_name)

        if clf_name == 'Naive Bayes':  # Naive Bayes doesn't have any hyperparameters to tune
            X_n, y_n = f_factory.get_feature_matrix_and_label(True, True, True, True, False)
            roc_auc, roc_auc_std, recall, recall_std, specificity, specificity_std, precision, precision_std, \
                f1, f1_std, conf_mat, _ = get_performance(clf, clf_name, X_n, y_n, create_curves=create_curves)
        else:
            roc_auc, roc_auc_std, recall, recall_std, specificity, specificity_std, precision, precision_std, f1, \
                f1_std, conf_mat, _ = get_performance(clf, clf_name, X, y, tuned_parameters,
                                                      create_curves=create_curves)

        scores_mean.append([roc_auc, recall, specificity, precision, f1])
        scores_std.append([roc_auc_std, recall_std, specificity_std, precision_std, f1_std])
        tuned_params.append(get_tuned_params_dict(clf, tuned_parameters))
        conf_mats.append(conf_mat)

    if create_barchart:
        title = 'Scores by classifier with hyperparameter tuning' if tune_hyperparameters \
                else 'Scores by classifier without hyperparameter tuning'
        _plot_barchart_scores(names, [s[0] for s in scores_mean], [s[0] for s in scores_std], title, filename + '.pdf')

    s = ''

    roc_scores = [s[0] for s in scores_mean]
    roc_scores_std = [s[0] for s in scores_std]
    recall_scores = [s[1] for s in scores_mean]
    recall_scores_std = [s[1] for s in scores_std]
    specifity_scores = [s[2] for s in scores_mean]
    specifity_scores_std = [s[2] for s in scores_mean]
    precision_scores = [s[3] for s in scores_mean]
    precision_scores_std = [s[3] for s in scores_std]
    f1_scores = [s[4] for s in scores_mean]
    f1_scores_std = [s[4] for s in scores_std]

    for i, name in enumerate(names):
        s += create_string_from_scores(name, roc_scores[i], roc_scores_std[i], recall_scores[i], recall_scores_std[i],
                                       specifity_scores[i], specifity_scores_std[i], precision_scores[i],
                                       precision_scores_std[i], f1_scores[i], f1_scores_std[i],
                                       conf_mats[i], tuned_params[i])

    if do_write_to_file:
        write_to_file(s, 'Performance/', filename + '.txt', 'w+')

    return roc_scores, roc_scores_std, s

示例#8

0

显示文件

def clf_performance_with_user_left_out_vs_normal(X,
                                                 y,
                                                 plot_auc_score_per_user=True,
                                                 reduced_features=False,
                                                 reduced_classifiers=True,
                                                 pre_set=True):
    """
    Plots a barchart with the mean roc_auc score for each classfier in two scenarios:
    1. Do normal crossvalidation to get roc_auc (There can thus be part of a users
        logfile in the training set AND in the testset. This could influence the performance on the testset as
        the model has already seen part of the users data/behavior in the training set)
    2. For the training_data, use all but one user, and then predict score on the last user that was NOT
        used in the training phase!


    :param X: Feature matrix
    :param y: labels
    :param plot_auc_score_per_user: Whether or not we should create a plot for each user left out with the auc_score of
                                    each classifier when using LeaveOneGroupOut cross validation
    :param reduced_features: Whether we should use all features or do feature selection first
    :param reduced_classifiers: Only use reduced classifiers (see classifiers.py)
    :param pre_set: Some classifiers have pre_tuned parameters (on Euler). Take those isntead of tuning

    Folder:     Report/
    Plot name:  clf_performance_with_user_left_out_vs_normal.pdf
    """
    if reduced_classifiers:
        clf_names = classifiers.reduced_names
    else:
        clf_names = classifiers.names

    if pre_set:
        clf_list = [
            classifiers.get_cclassifier_with_name(name, X, y).tuned_clf
            for name in clf_names
        ]
    else:
        clf_list = [
            classifiers.get_cclassifier_with_name(name, X, y).clf
            for name in clf_names
        ]

    # Get scores for scenario 1 (normal crossvalidation)
    print('\n***** Scenario 1 (normal crossvalidation) *****\n')
    auc_scores_scenario_1, auc_stds_scenario_1, s = model_factory. \
        calculate_performance_of_classifiers(X, y, tune_hyperparameters=True, reduced_clfs=reduced_classifiers,
                                             create_curves=False, do_write_to_file=False, pre_set=True)

    # Get scores for scenario 2 (Leave one user out in training phase)
    print(
        '\n***** Scenario 2  (Leave one user out in training phase) ***** \n')
    auc_scores_scenario_2 = []
    auc_stds_scenario_2 = []
    for name, classifier in zip(clf_names, clf_list):
        print('Calculating performance of %s with doing LeaveOneGroupOut ...' %
              name)

        # If NaiveBayes classifier is used, then use Boxcox since features must be gaussian distributed
        if name == 'Naive Bayes':
            feature_selection = 'selected' if reduced_features else 'all'
            X_nb, y_nb = f_factory.get_feature_matrix_and_label(
                verbose=False,
                use_cached_feature_matrix=feature_selection,
                save_as_pickle_file=True,
                use_boxcox=True,
                reduced_features=False)
            classifier.fit(X_nb, y_nb)

            auc_mean, auc_std = _apply_cv_per_user_model(
                classifier, name, X_nb, y_nb, plot_auc_score_per_user)
        else:
            classifier.fit(X, y)

            auc_mean, auc_std = _apply_cv_per_user_model(
                classifier, name, X, y, plot_auc_score_per_user)

        auc_scores_scenario_2.append(auc_mean)
        auc_stds_scenario_2.append(auc_std)

    _plot_scores_normal_cv_vs_leaveone_group_out_cv(clf_names,
                                                    auc_scores_scenario_1,
                                                    auc_stds_scenario_1,
                                                    auc_scores_scenario_2,
                                                    auc_stds_scenario_2)