示例#1
0
def run_epoch_weight_init(X_train, X_test, y_train, y_test, layers,
                          epochs, weight_inits,
                          figure_folder="../fig",
                          try_get_pickle=True,
                          **kwargs):
    """Compares two weight inits."""
    param_dict = {"weight_inits": weight_inits}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_epoch_weight_inits_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {wi: {} for wi in weight_inits}
        for i, wi in enumerate(weight_inits):
            print("Weight init: {}".format(wi))
            res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                           weight_init=wi, return_weights=True,
                           epochs=epochs, **kwargs)

            data[wi] = convert_nn_core_to_dict(res_)
            data[wi]["label"] = wi.capitalize()
            data[wi]["x"] = np.arange(epochs)
            data[wi]["y"] = \
                np.array(data[wi]["epoch_evaluations"]) / X_test.shape[0]

        save_pickle(pickle_fname, data)

    figname = "mlp_epoch_weight_inits.pdf"

    plot_epoch_accuracy(data, r"Epoch", r"Accuracy",
                        figname, vmin=0.0, vmax=1.0)
示例#2
0
def run_epoch_cost_functions(X_train, X_test, y_train, y_test, layers,
                             epochs, cost_functions,
                             try_get_pickle=True,
                             figure_folder="../fig", **kwargs):
    """Compares cost functions over epochs inits: mse, log-loss"""
    param_dict = {"cost_functions": cost_functions}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_epoch_cost_functions_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {cf: {} for cf in cost_functions}
        for i, cf in enumerate(cost_functions):
            print("Cost function: {}".format(cf))
            res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                           cost_function=cf, return_weights=True,
                           epochs=epochs, **kwargs)

            data[cf] = convert_nn_core_to_dict(res_)
            data[cf]["label"] = cf.capitalize()
            data[cf]["x"] = np.arange(epochs)
            data[cf]["y"] = \
                np.array(data[cf]["epoch_evaluations"]) / X_test.shape[0]

        save_pickle(pickle_fname, data)

    figname = "mlp_epoch_cost_functions.pdf"

    plot_epoch_accuracy(data, r"Epoch", r"Accuracy",
                        figname, vmin=0.0, vmax=1.0)
示例#3
0
def run_epoch_activations(X_train, X_test, y_train, y_test, layers,
                          epochs, activations,
                          try_get_pickle=True,
                          figure_folder="../fig", **kwargs):
    """Compares different layer activations."""
    param_dict = {"activations": activations}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_epoch_activations_{}_results.pkl".format(kwargs["cost_function"])

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {act: {} for act in activations}
        for i, act in enumerate(activations):
            print("Activation: {}".format(act))
            res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                           activation=act, return_weights=True,
                           epochs=epochs, **kwargs)

            data[act] = convert_nn_core_to_dict(res_)
            data[act]["label"] = act.capitalize()
            data[act]["x"] = np.arange(epochs)
            data[act]["y"] = \
                np.array(data[act]["epoch_evaluations"]) / X_test.shape[0]

        save_pickle(pickle_fname, data)

    figname = "mlp_epoch_activations_{}_optimal_20mb_10neurons.pdf".format(kwargs["cost_function"])

    plot_epoch_accuracy(data, r"Epoch", r"Accuracy",
                        figname, vmin=0.0, vmax=1.0)
示例#4
0
def run_lambda_learning_rate_comparison(X_train,
                                        X_test,
                                        y_train,
                                        y_test,
                                        lmbdas=None,
                                        learning_rates=None,
                                        try_get_pickle=True,
                                        figure_folder="../fig",
                                        **kwargs):

    param_dict = {
        "lmbdas": lmbdas,
        "sklearn": True,
        "figure_folder": figure_folder,
        "learning_rates": learning_rates
    }
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    test_accuracy_values = []
    train_accuracy_values = []

    for lr in learning_rates:
        print("Learning rate: ", lr)

        pickle_fname = ("lambda_lr_accuracy_penalty{}_actsigmoid"
                        "_solver{}_lr{}_mom0.0_tol1e-06."
                        "pkl".format(kwargs["solver"], kwargs["penalty"],
                                     str(lr)))

        if os.path.isfile(pickle_fname) and try_get_pickle:
            res_ = load_pickle(pickle_fname)
        else:
            res_ = logreg_core(X_train,
                               X_test,
                               y_train,
                               y_test,
                               use_sk_learn=False,
                               lmbdas=lmbdas,
                               learning_rate=lr,
                               store_pickle=True,
                               pickle_fname=pickle_fname,
                               **kwargs)

        train_accuracy, test_accuracy, critical_accuracy = res_

        test_accuracy_values.append(test_accuracy)
        train_accuracy_values.append(train_accuracy)

    lr_labels = [r"Optimized"]
    lr_labels += [r"$\eta={0:.2f}$".format(lr) for lr in learning_rates[1:]]
    plot_accuracy_scores(lmbdas, train_accuracy_values, test_accuracy_values,
                         lr_labels, "accuracy_learning_rate_scores",
                         r"$\lambda$", r"Accuracy")
示例#5
0
def run_lambda_solver(X_train,
                      X_test,
                      y_train,
                      y_test,
                      lmbdas=None,
                      solvers=None,
                      try_get_pickle=True,
                      figure_folder="../fig",
                      **kwargs):

    param_dict = {
        "lmbdas": lmbdas,
        "sklearn": True,
        "figure_folder": figure_folder,
        "solver": solvers
    }
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    test_accuracy_values = []
    train_accuracy_values = []

    for solver in solvers:
        print("Solver:", solver)

        pickle_fname = ("lambda_solver_accuracy_penalty{}_actsigmoid"
                        "_solver{}_lr{}_mom0.0_tol1e-06."
                        "pkl".format(kwargs["penalty"], solver,
                                     kwargs["learning_rate"]))

        if os.path.isfile(pickle_fname) and try_get_pickle:
            res_ = load_pickle(pickle_fname)
        else:
            res_ = logreg_core(X_train,
                               X_test,
                               y_train,
                               y_test,
                               use_sk_learn=False,
                               lmbdas=lmbdas,
                               solver=solver,
                               store_pickle=True,
                               pickle_fname=pickle_fname,
                               **kwargs)

        train_accuracy, test_accuracy, critical_accuracy = res_

        test_accuracy_values.append(test_accuracy)
        train_accuracy_values.append(train_accuracy)

    plot_accuracy_scores(
        lmbdas, train_accuracy_values, test_accuracy_values,
        [r"Optimized Gradient Descent", r"Conjugate Gradient", r"Newtons-CG"],
        "accuracy_solver_scores", r"$\lambda$", r"Accuracy")
示例#6
0
def run_lambda_penalty(X_train,
                       X_test,
                       y_train,
                       y_test,
                       lmbdas=None,
                       penalties=None,
                       try_get_pickle=True,
                       figure_folder="../fig",
                       **kwargs):

    param_dict = {
        "lmbdas": lmbdas,
        "sklearn": True,
        "figure_folder": figure_folder,
        "penalties": penalties
    }
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    test_accuracy_values = []
    train_accuracy_values = []

    for penalty in penalties:
        print("Regularisation:", penalty)

        pickle_fname = ("lambda_penalty_accuracy_penalty{}_actsigmoid"
                        "_solver{}_lrinverse_mom0.0_tol1e-06."
                        "pkl".format(penalty, kwargs["solver"]))

        if os.path.isfile(pickle_fname) and try_get_pickle:
            res_ = load_pickle(pickle_fname)
        else:
            res_ = logreg_core(X_train,
                               X_test,
                               y_train,
                               y_test,
                               lmbdas=lmbdas,
                               penalty=penalty,
                               store_pickle=True,
                               pickle_fname=pickle_fname,
                               **kwargs)

        train_accuracy, test_accuracy, critical_accuracy = res_

        test_accuracy_values.append(test_accuracy)
        train_accuracy_values.append(train_accuracy)

    plot_accuracy_scores(lmbdas, train_accuracy_values, test_accuracy_values,
                         [r"$L^1$", r"$L^2$", r"Elastic net"],
                         "accuracy_regularisation_scores", r"$\lambda$",
                         r"Accuracy")
示例#7
0
def run_lambda_momentum(X_train,
                        X_test,
                        y_train,
                        y_test,
                        lmbdas=None,
                        momentums=None,
                        try_get_pickle=True,
                        figure_folder="../fig",
                        **kwargs):

    param_dict = {
        "lmbdas": lmbdas,
        "sklearn": True,
        "figure_folder": figure_folder,
        "momentums": momentums
    }
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    test_accuracy_values = []
    train_accuracy_values = []

    for momentum in momentums:
        print("Momentum: ", momentum)
        pickle_fname = ("lambda_mom_accuracy_penalty{}_actsigmoid"
                        "_solverlr-gd_lrinverse_mom{}_tol1e-06."
                        "pkl".format(kwargs["penalty"], str(momentum)))

        if os.path.isfile(pickle_fname) and try_get_pickle:
            res_ = load_pickle(pickle_fname)
        else:
            res_ = logreg_core(X_train,
                               X_test,
                               y_train,
                               y_test,
                               use_sk_learn=False,
                               lmbdas=lmbdas,
                               momentum=momentum,
                               store_pickle=True,
                               pickle_fname=pickle_fname,
                               **kwargs)

        train_accuracy, test_accuracy, critical_accuracy = res_

        test_accuracy_values.append(test_accuracy)
        train_accuracy_values.append(train_accuracy)

    plot_accuracy_scores(lmbdas, train_accuracy_values, test_accuracy_values,
                         [r"$\gamma={0:.1e}$".format(m) for m in momentums],
                         "accuracy_momentum_scores", r"$\lambda$", r"Accuracy")
示例#8
0
def run_lambda_mini_batches(X_train, X_test, y_train, y_test, layers,
                            lmbdas=None, mini_batch_sizes=None,
                            try_get_pickle=True,
                            figure_folder="../fig", **kwargs):
    """Compares mini batch sizes for lambda values."""

    param_dict = {"lmbdas": lmbdas, "mini_batch_sizes": mini_batch_sizes}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_lambda_mini_batch_sizes_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {lmbda: {mb: {} for mb in mini_batch_sizes} for lmbda in lmbdas}
        for i, lmbda in enumerate(lmbdas):
            for j, mb in enumerate(mini_batch_sizes):
                print("Lambda: {} MB: {}".format(lmbda, mb))
                res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                               lmbda=lmbda,
                               mini_batch_size=mb,
                               return_weights=True,
                               **kwargs)
                data[lmbda][mb] = res_

        save_pickle(pickle_fname, data)

    # Maps values to matrix
    plot_data = np.empty((len(lmbdas), len(mini_batch_sizes)))

    # Populates plot data
    for i, lmbda in enumerate(lmbdas):
        for j, mb in enumerate(mini_batch_sizes):
            plot_data[i, j] = data[lmbda][mb][1]

    heatmap_plotter(lmbdas, mini_batch_sizes, plot_data.T,
                    "mlp_lambda_mini_batch_size.pdf",
                    tick_param_fs=8, label_fs=10,
                    vmin=0.0, vmax=1.0, xlabel=r"$\lambda$",
                    ylabel=r"$N_\mathrm{MB}$",
                    cbartitle=r"Accuracy",
                    x_tick_mode="exp", y_tick_mode="int")
示例#9
0
def run_neurons_training_size(layers, neurons, training_sizes,
                              data_size, data_path, try_get_pickle=True,
                              figure_folder="../fig", **kwargs):
    """Compares different neurons for different training sizese."""
    param_dict = {"neurons": neurons, "training_sizes": training_sizes}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_neurons_training_size_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {n: {ts: {} for ts in training_sizes} for n in neurons}
        for i, neuron in enumerate(neurons):
            for j, ts in enumerate(training_sizes):
                inlay, outlay, X_train, X_test, y_train, y_test = \
                    retrieve_2d_data_formatted(data_path, data_size, ts)

                print("Neurons: {} Training size: {}".format(neuron, ts))
                layers[1] = neuron
                print(X_train.shape, X_test.shape)
                res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                               return_weights=True, **kwargs)
                data[neuron][ts] = res_

        save_pickle(pickle_fname, data)

    # Maps values to matrix
    plot_data = np.empty((len(neurons), len(training_sizes)))

    # Populates plot data
    for i, n in enumerate(neurons):
        for j, ts in enumerate(training_sizes):
            plot_data[i, j] = data[n][ts][1]

    heatmap_plotter(neurons, training_sizes, plot_data.T,
                    "mlp_neurons_training_size.pdf",
                    tick_param_fs=8, label_fs=10,
                    xlabel=r"Neurons", ylabel=r"Training size",
                    cbartitle=r"Accuracy",  vmin=0.0, vmax=1.0,
                    x_tick_mode="int", y_tick_mode="float")
示例#10
0
def run_neurons_eta(X_train, X_test, y_train, y_test, layers,
                    neurons=None, learning_rates=None,
                    try_get_pickle=True,
                    figure_folder="../fig", **kwargs):
    """Compares different neuron sizes for different etas."""

    param_dict = {"neurons": neurons, "learning_rates": learning_rates}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_neurons_eta_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {n: {eta: {} for eta in learning_rates} for n in neurons}
        for i, neuron in enumerate(neurons):
            for j, eta in enumerate(learning_rates):
                print("Neuron: {} Eta: {}".format(neuron, eta))
                layers[1] = neuron
                res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                               return_weights=True,
                               learning_rate=eta, **kwargs)
                data[neuron][eta] = res_

        save_pickle(pickle_fname, data)

    # Maps values to matrix
    plot_data = np.empty((len(neurons), len(learning_rates)))

    # Populates plot data
    for i, n in enumerate(neurons):
        for j, eta in enumerate(learning_rates):
            plot_data[i, j] = data[n][eta][1]

    heatmap_plotter(neurons, learning_rates, plot_data.T,
                    "mlp_neurons_eta.pdf",
                    tick_param_fs=8, label_fs=10,
                    xlabel=r"Neurons", ylabel=r"$\eta$",
                    cbartitle=r"Accuracy", vmin=0.0, vmax=1.0,
                    x_tick_mode="int", y_tick_mode="exp")
示例#11
0
def run_lambda_neurons(X_train, X_test, y_train, y_test, layers,
                       lmbdas=None, neurons=None,
                       try_get_pickle=True,
                       figure_folder="../fig", **kwargs):
    """Compares different lambdas for different neuron sizes."""

    param_dict = {"lmbdas": lmbdas, "neurons": neurons}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_lambda_neurons_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {lmbda: {neuron: {} for neuron in neurons} for lmbda in lmbdas}
        for i, lmbda in enumerate(lmbdas):
            for j, neuron in enumerate(neurons):
                print("Lambda: {} Neuron: {}".format(lmbda, neuron))
                layers[1] = neuron
                res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                               lmbda=lmbda, return_weights=True,
                               **kwargs)
                data[lmbda][neuron] = res_

        save_pickle(pickle_fname, data)

    # Maps values to matrix
    plot_data = np.empty((len(lmbdas), len(neurons)))

    # Populates plot data
    for i, lmbda in enumerate(lmbdas):
        for j, n in enumerate(neurons):
            plot_data[i, j] = data[lmbda][n][1]

    heatmap_plotter(lmbdas, neurons, plot_data.T, "mlp_lambda_neurons.pdf",
                    tick_param_fs=8, label_fs=10,
                    xlabel=r"$\lambda$",
                    ylabel=r"Neurons", vmin=0.0, vmax=1.0,
                    cbartitle=r"Accuracy", x_tick_mode="exp",
                    y_tick_mode="int")
示例#12
0
def run_lambda_eta(X_train, X_test, y_train, y_test, layers,
                   lmbdas=None, learning_rates=None, try_get_pickle=True,
                   figure_folder="../fig", **kwargs):
    """Runs NN for different lambdas and etas."""
    param_dict = {"lmbdas": lmbdas, "learning_rates": learning_rates}
    param_dict.update(kwargs)
    print_parameters(**param_dict)

    # Double for-loop for all results
    pickle_fname = "mlp_lambda_eta_results.pkl"

    if os.path.isfile(pickle_fname) and try_get_pickle:
        data = load_pickle(pickle_fname)
    else:
        data = {lmbda: {eta: {} for eta in learning_rates} for lmbda in lmbdas}
        for i, lmbda in enumerate(lmbdas):
            for j, eta in enumerate(learning_rates):
                print("Lambda: {} Eta: {}".format(lmbda, eta))
                res_ = nn_core(X_train, X_test, y_train, y_test, layers,
                               lmbda=lmbda, return_weights=True,
                               learning_rate=eta, **kwargs)
                data[lmbda][eta] = res_

        save_pickle(pickle_fname, data)

    # Maps values to matrix
    plot_data = np.empty((len(lmbdas), len(learning_rates)))

    # Populates plot data
    for i, lmbda in enumerate(lmbdas):
        for j, eta in enumerate(learning_rates):
            plot_data[i, j] = data[lmbda][eta][1]

    heatmap_plotter(lmbdas, learning_rates, plot_data.T, "mlp_lambda_eta.pdf",
                    tick_param_fs=8, label_fs=10,
                    xlabel=r"$\lambda$", ylabel=r"$\eta$",
                    cbartitle=r"Accuracy", vmin=0.0, vmax=1.0,
                    x_tick_mode="exp", y_tick_mode="exp")
示例#13
0
def task1b_bias_variance_analysis(pickle_fname, figure_folder="../fig"):
    """Plot different bias/variance values"""
    lambda_values = np.logspace(-4, 4, 9)
    data = load_pickle(pickle_fname)

    def select_value(input_list, data_to_select, with_train=False):
        """Small function moving selected values to list."""
        if with_train:
            return {
                "train": [e["train"][data_to_select] for e in input_list],
                "test": [e["test"][data_to_select] for e in input_list]
            }
        else:
            return [e[data_to_select] for e in input_list]

    # OLS values
    ols_r2 = {
        "train": data["ols"]["train"]["r2"],
        "test": data["ols"]["test"]["r2"]
    }
    ols_mse = {
        "train": data["ols"]["train"]["mse"],
        "test": data["ols"]["test"]["mse"]
    }
    ols_bias = {
        "train": data["ols"]["train"]["bias"],
        "test": data["ols"]["test"]["bias"]
    }
    # Bootstrap OLS
    ols_bs_r2 = data["ols_bs"]["r2"]
    ols_bs_mse = data["ols_bs"]["mse"]
    ols_bs_bias = data["ols_bs"]["bias"]
    ols_bs_var = data["ols_bs"]["var"]
    # k-fold CV OLS
    ols_cv_r2 = data["ols_cv"]["r2"]
    ols_cv_mse = data["ols_cv"]["mse"]
    ols_cv_bias = data["ols_cv"]["bias"]
    ols_cv_var = data["ols_cv"]["var"]
    heatmap_data = data["heatmap_data"]
    L_system_size = data["L_system_size"]

    # Plots the heatmap data
    for i, lmbda in enumerate(lambda_values):
        J_leastsq, J_ridge, J_lasso = heatmap_data[i]

        plot_heatmap(J_leastsq, J_ridge, J_lasso, L_system_size, lmbda,
                     figure_folder,
                     "regression_ising_1d_heatmap_lambda{}.pdf".format(lmbda))

    # General Ridge values
    ridge_r2 = select_value(data["ridge"], "r2", with_train=True)
    ridge_mse = select_value(data["ridge"], "mse", with_train=True)
    ridge_bias = select_value(data["ridge"], "bias", with_train=True)
    # Bootstrap Ridge values
    ridge_bs_mse = select_value(data["ridge_bs"], "mse")
    ridge_bs_bias = select_value(data["ridge_bs"], "bias")
    ridge_bs_var = select_value(data["ridge_bs"], "var")
    # k-fold CV Ridge values
    ridge_cv_mse = select_value(data["ridge_cv"], "mse")
    ridge_cv_bias = select_value(data["ridge_cv"], "bias")
    ridge_cv_var = select_value(data["ridge_cv"], "var")

    # General Lasso values
    lasso_r2 = select_value(data["lasso"], "r2", with_train=True)
    lasso_mse = select_value(data["lasso"], "mse", with_train=True)
    lasso_bias = select_value(data["lasso"], "bias", with_train=True)
    # Bootstrap Lasso
    lasso_bs_mse = select_value(data["lasso_bs"], "mse")
    lasso_bs_bias = select_value(data["lasso_bs"], "bias")
    lasso_bs_var = select_value(data["lasso_bs"], "var")
    # k-fold CV Lasso
    lasso_cv_mse = select_value(data["lasso_cv"], "mse")
    lasso_cv_bias = select_value(data["lasso_cv"], "bias")
    lasso_cv_var = select_value(data["lasso_cv"], "var")

    plot_dual_values(lambda_values,
                     ridge_r2["test"],
                     lambda_values,
                     lasso_r2["test"],
                     lambda_values,
                     ols_r2["test"],
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_r2",
                     r"$\lambda$",
                     r"$R^2$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_mse["test"],
                     lambda_values,
                     lasso_mse["test"],
                     lambda_values,
                     ols_mse["test"],
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_mse",
                     r"$\lambda$",
                     r"$\mathrm{MSE}$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_bias["test"],
                     lambda_values,
                     lasso_bias["test"],
                     lambda_values,
                     ols_bias["test"],
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_bias",
                     r"$\lambda$",
                     r"$\mathrm{Bias}$",
                     figure_folder=figure_folder)

    # Plots Bootstrap analysis
    # plot_dual_values(lambda_values, ridge_bs_r2,
    #                  lambda_values, lasso_bs_r2,
    #                  lambda_values, ols_bs_r2,
    #                  r"Ridge", r"Lasso", r"OLS",
    #                  "ols_ridge_lasso_lambda_bs_r2",
    #                  r"$\lambda$", r"$R^2$", figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_bs_mse,
                     lambda_values,
                     lasso_bs_mse,
                     lambda_values,
                     ols_bs_mse,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_bs_mse",
                     r"$\lambda$",
                     r"$\mathrm{MSE}$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_bs_bias,
                     lambda_values,
                     lasso_bs_bias,
                     lambda_values,
                     ols_bs_bias,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_bs_bias",
                     r"$\lambda$",
                     r"$\mathrm{Bias}$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_bs_var,
                     lambda_values,
                     lasso_bs_var,
                     lambda_values,
                     ols_bs_var,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_bs_var",
                     r"$\lambda$",
                     r"$R^2$",
                     figure_folder=figure_folder)

    # Plots Cross validation analysis
    # plot_dual_values(lambda_values, ridge_cv_r2,
    #                  lambda_values, lasso_cv_r2,
    #                  lambda_values, ols_cv_r2,
    #                  r"Ridge", r"Lasso", r"OLS",
    #                  "ols_ridge_lasso_lambda_cv_r2",
    #                  r"$\lambda$", r"$R^2$", figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_cv_mse,
                     lambda_values,
                     lasso_cv_mse,
                     lambda_values,
                     ols_cv_mse,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_cv_mse",
                     r"$\lambda$",
                     r"$\mathrm{MSE}$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_cv_bias,
                     lambda_values,
                     lasso_cv_bias,
                     lambda_values,
                     ols_cv_bias,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_cv_bias",
                     r"$\lambda$",
                     r"$\mathrm{Bias}$",
                     figure_folder=figure_folder)
    plot_dual_values(lambda_values,
                     ridge_cv_var,
                     lambda_values,
                     lasso_cv_var,
                     lambda_values,
                     ols_cv_var,
                     r"Ridge",
                     r"Lasso",
                     r"OLS",
                     "ols_ridge_lasso_lambda_cv_var",
                     r"$\lambda$",
                     r"$R^2$",
                     figure_folder=figure_folder)

    # Plots Bias-Variance for OLS
    plot_bias_variance(lambda_values,
                       ols_bs_bias,
                       ols_bs_var,
                       ols_bs_mse,
                       "ols_bs_bias_variance_analysis",
                       figure_folder,
                       x_hline=True)
    plot_bias_variance(lambda_values,
                       ols_cv_bias,
                       ols_cv_var,
                       ols_cv_mse,
                       "ols_cv_bias_variance_analysis",
                       figure_folder,
                       x_hline=True)

    # Plots Bias-Variance for Ridge
    plot_bias_variance(lambda_values, ridge_bs_bias, ridge_bs_var,
                       ridge_bs_mse, "ridge_bs_bias_variance_analysis",
                       figure_folder)
    plot_bias_variance(lambda_values, ridge_cv_bias, ridge_cv_var,
                       ridge_cv_mse, "ridge_cv_bias_variance_analysis",
                       figure_folder)

    # Plots Bias-Variance for Lasso
    plot_bias_variance(lambda_values, lasso_bs_bias, lasso_bs_var,
                       lasso_bs_mse, "lasso_bs_bias_variance_analysis",
                       figure_folder)
    plot_bias_variance(lambda_values, lasso_cv_bias, lasso_cv_var,
                       lasso_cv_mse, "lasso_cv_bias_variance_analysis",
                       figure_folder)

    # Plots R2 scores
    plot_all_r2(lambda_values, ols_r2["test"], ols_r2["train"],
                ridge_r2["test"], ridge_r2["train"], lasso_r2["test"],
                lasso_r2["train"], "r2_ols_ridge_lasso", figure_folder)
示例#14
0
def run_sk_comparison(X_train,
                      X_test,
                      y_train,
                      y_test,
                      lmbdas=None,
                      penalty=None,
                      activation=None,
                      solver=None,
                      learning_rate=None,
                      momentum=None,
                      mini_batch_size=None,
                      max_iter=None,
                      tolerance=None,
                      verbose=False,
                      try_get_pickle=True,
                      figure_folder="../fig"):
    """Runs a comparison between sk learn and our method."""

    param_dict = {
        "lmbdas": lmbdas,
        "sklearn": True,
        "penalty": penalty,
        "activation": activation,
        "solver": solver,
        "learning_rate": learning_rate,
        "momentum": momentum,
        "mini_batch_size": mini_batch_size,
        "max_iter": max_iter,
        "tolerance": tolerance
    }
    print_parameters(**param_dict)

    pickle_fname = ("sk_comparison_accuracy_sklearn_penalty{}_actsigmoid"
                    "_solverlr-gd_lrinverse_mom0.0_tol1e-06"
                    ".pkl".format(penalty))

    if os.path.isfile(pickle_fname) and try_get_pickle:
        res_ = load_pickle(pickle_fname)
    else:
        res_ = logreg_core(X_train,
                           X_test,
                           y_train,
                           y_test,
                           use_sk_learn=True,
                           lmbdas=lmbdas,
                           penalty=penalty,
                           activation=activation,
                           solver=solver,
                           learning_rate=learning_rate,
                           momentum=momentum,
                           mini_batch_size=mini_batch_size,
                           max_iter=max_iter,
                           tolerance=tolerance,
                           store_pickle=True,
                           pickle_fname=pickle_fname,
                           verbose=verbose)

    # Retrieves results
    train_accuracy, test_accuracy, critical_accuracy, \
        train_accuracy_SK, test_accuracy_SK, critical_accuracy_SK, \
        train_accuracy_SGD, test_accuracy_SGD, critical_accuracy_SGD = res_

    print('Mean accuracy: train, test')

    print("HomeMade: {0:0.4f} +/- {1:0.2f}, {2:0.4f} +/- {3:0.2f}".format(
        np.mean(train_accuracy), np.std(train_accuracy),
        np.mean(test_accuracy), np.std(test_accuracy)))

    print("SK: {0:0.4f} +/- {1:0.2f}, {2:0.4f} +/- {3:0.2f}".format(
        np.mean(train_accuracy_SK), np.std(train_accuracy_SK),
        np.mean(test_accuracy_SK), np.std(test_accuracy_SK)))

    print("SGD: {0:0.4f} +/- {1:0.2f}, {2:0.4f} +/- {3:0.2f}".format(
        np.mean(train_accuracy_SGD), np.std(train_accuracy_SGD),
        np.mean(test_accuracy_SGD), np.std(test_accuracy_SGD)))

    plot_accuracy_comparison(lmbdas, train_accuracy, test_accuracy,
                             train_accuracy_SK, test_accuracy_SK,
                             train_accuracy_SGD, test_accuracy_SGD,
                             "logistic_accuracy_sklearn_comparison",
                             figure_folder)