示例#1
0
def compare_on_adults(c=5, eps=1e-3, epochs=1):
    x, y = load_adults_dataset()

    # subsample
    n = 500
    x = x[:n]
    y = y[:n]

    # normalize
    normalizer = Normalizer(x)
    x = normalizer.normalize(x)

    # projection
    # projection = projections.build_gaussian_projection(x, sampling_rate=0.01)
    projection = projections.identity_projection

    # optimizers
    estimator_sgd = LogisticSGD(c=c, eps=eps)
    estimator_sdca = LogisticSDCA(c=c)

    # compute and plot
    plot_learning(x,
                  y,
                  chosen_sgd=estimator_sgd,
                  chosen_sdca=estimator_sdca,
                  nb_epochs=epochs,
                  comp_sgd=True,
                  comp_sdca=True,
                  is_malaptool=False,
                  projection=projection)
def plot_gausian_arr():
    data_name = "Arrhythmia"
    nb_epoch = 40

    x, y = import_data_arrhythmia()

    h = 10
    prop = 0.1
    # base of projection
    Base_proj = create_base(x, prop=prop)
    x = gaussian_proj(x, Base_proj, h)

    # normalization
    normalizer = Normalizer(x)
    x = normalizer.normalize(x)

    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.15)

    # make estimator
    sgd = LogisticSGD(c=10**3, eps=10**-6)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=10**-1)
    sdca_clf = LogisticRegression(optimizer=sdca)

    # train estimator with history
    sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train,
                                            y_train,
                                            epochs=nb_epoch,
                                            save_hist=True)
    sgd_hist_w = np.array(sgd_hist_w)

    sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train,
                                               y_train,
                                               epochs=nb_epoch,
                                               save_hist=True)
    sdca_hist_w = np.array(sdca_hist_w)

    plt.figure()
    plt.plot(sgd_hist_loss)
    plt.title("SGD learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")

    plt.figure()
    plt.title("SDCA learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.plot(sdca_hist_loss)

    sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf)
    sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w,
                                           sdca_clf)
    plt.figure()
    plt.plot(sgd_hist_accuracy, c='b', label="SGD")
    plt.plot(sdca_hist_accuracy, c='g', label="SDCA")
    plt.title("Test accuracy vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Accuracy")
    plt.legend()
def eval_eps(data, labels, vect_param, nb_epoch, data_name, param_c=10**1):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    vect_train_accuracy_sgd = []

    vect_test_accuracy_sgd = []

    for param in vect_param:
        # make estimator
        sgd = LogisticSGD(c=param_c, eps=param)
        sgd_clf = LogisticRegression(optimizer=sgd)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title("Accuracy of SGD vs. hyperparameter epsilon \non data set " +
              data_name)
    plt.xlabel("Epsilon")
    plt.ylabel("Accuracy")
    plt.legend()
 def __init__(
     self,
     optimizer: BaseOptimizer = LogisticSGD(2, 1e-4),
     projection: Callable[[np.ndarray],
                          np.ndarray] = projections.identity_projection):
     super().__init__(projection)
     self.optimizer = optimizer
     self.w = np.zeros(1)
 def setUp(self):
     self.sgd = LogisticSGD(1, 1e-3)
     self.estimator = LogisticRegression(optimizer=self.sgd)
     np.random.seed(0)
     x1 = np.random.normal(loc=(-1, -1), scale=(1, 1), size=(10, 2))
     x2 = np.random.normal(loc=(1, 1), scale=(1, 1), size=(10, 2))
     self.x = np.concatenate([x1, x2])
     y1 = -np.ones(shape=10)
     y2 = np.ones(shape=10)
     self.y = np.concatenate([y1, y2])
示例#6
0
def compare_on_sklearn():
    x, y = load_sklearn_dataset(data_set_name="lfw", n=1000)

    estimator_sgd = LogisticSGD(c=0.1, eps=1e-3)
    estimator_sdca = LogisticSDCA(c=0.1)

    plot_learning(x,
                  y,
                  chosen_sgd=estimator_sgd,
                  chosen_sdca=estimator_sdca,
                  nb_epochs=1,
                  comp_sgd=True,
                  comp_sdca=True,
                  is_malaptool=False)
def eval_c(data, labels, vect_param, nb_epoch, data_name, eps_base=10**-6):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    vect_train_accuracy_sgd = []
    vect_train_accuracy_sdca = []

    vect_test_accuracy_sgd = []
    vect_test_accuracy_sdca = []

    for param in vect_param:
        # make estimator
        sgd = LogisticSGD(c=param, eps=eps_base)
        sgd_clf = LogisticRegression(optimizer=sgd)

        sdca = LogisticSDCA(c=param)
        sdca_clf = LogisticRegression(optimizer=sdca)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)
        sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))
        vect_train_accuracy_sdca.append(
            sdca_clf.score_accuracy(X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))
        vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title("SGD accuracy vs. hyperparameter C\n on data set " + data_name)
    plt.xlabel("C")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test")
    plt.title("SDCA accuracy vs. hyperparameter C\n on data set " + data_name)
    plt.xlabel("C")
    plt.ylabel("Accuracy")
    plt.legend()
def eval_h(data, labels, vect_param, nb_epoch, data_name, prop_base, c_sgd,
           c_sdca, eps_sgd):
    x_train, x_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    Base_proj = create_base(x_train, prop=prop_base)
    dim, _ = Base_proj.shape
    print("dim :", dim)

    vect_train_accuracy_sgd = []
    vect_train_accuracy_sdca = []

    vect_test_accuracy_sgd = []
    vect_test_accuracy_sdca = []

    for param in vect_param:
        X_train = gaussian_proj(x_train, Base_proj, param)
        X_test = gaussian_proj(x_test, Base_proj, param)

        # normalisation
        normalizer = Normalizer(X_train)
        X_train = normalizer.normalize(X_train)
        X_test = normalizer.normalize(X_test)

        # make estimator
        sgd = LogisticSGD(c=c_sgd, eps=eps_sgd)
        sgd_clf = LogisticRegression(optimizer=sgd)

        sdca = LogisticSDCA(c=c_sdca)
        sdca_clf = LogisticRegression(optimizer=sdca)

        # train estimators without history
        sgd_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)
        sdca_clf.fit(X_train, y_train, epochs=nb_epoch, save_hist=False)

        vect_train_accuracy_sgd.append(sgd_clf.score_accuracy(
            X_train, y_train))
        vect_train_accuracy_sdca.append(
            sdca_clf.score_accuracy(X_train, y_train))

        vect_test_accuracy_sgd.append(sgd_clf.score_accuracy(X_test, y_test))
        vect_test_accuracy_sdca.append(sdca_clf.score_accuracy(X_test, y_test))

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sgd, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sgd, 'r', label="test")
    plt.title(
        "SGD accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set "
        .format(dim) + data_name)
    plt.xlabel("h")
    plt.ylabel("Accuracy")
    plt.legend()

    plt.figure()
    plt.semilogx(vect_param, vect_train_accuracy_sdca, 'b', label="train")
    plt.semilogx(vect_param, vect_test_accuracy_sdca, 'r', label="test")
    plt.title(
        "SDCA accuracy vs. hyperparameter h\nfor gaussian projection (dim = {})\n on data set "
        .format(dim) + data_name)
    plt.xlabel("h")
    plt.ylabel("Accuracy")
    plt.legend()
def plot_training(data, labels, nb_epoch, data_name, c_sgd, c_sdca, eps_sgd):
    X_train, X_test, y_train, y_test = train_test_split(data,
                                                        labels,
                                                        test_size=0.15)

    # make estimator
    sgd = LogisticSGD(c=c_sgd, eps=eps_sgd)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=c_sdca)
    sdca_clf = LogisticRegression(optimizer=sdca)

    # train estimator with history
    sgd_hist_w, sgd_hist_loss = sgd_clf.fit(X_train,
                                            y_train,
                                            epochs=nb_epoch,
                                            save_hist=True)
    sgd_hist_w = np.array(sgd_hist_w)

    # plot histories
    '''plt.figure()
    plt.title("Evolution of the weights : SGD")
    for d in range(sgd_hist_w.shape[1]):
        plt.plot(sgd_hist_w[:, d])'''

    plt.figure()
    plt.plot(sgd_hist_loss)
    plt.title("SGD learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")

    # final accuracy
    print("final accuracy SGD :", sgd_clf.score_accuracy(X_test, y_test))

    # do it again with SDCA !

    sdca_hist_w, sdca_hist_loss = sdca_clf.fit(X_train,
                                               y_train,
                                               epochs=nb_epoch,
                                               save_hist=True)
    sdca_hist_w = np.array(sdca_hist_w)
    '''plt.figure()
    plt.title("Evolution of the weights : SDCA")
    for d in range(sdca_hist_w.shape[1]):
        plt.plot(sdca_hist_w[:, d])'''

    plt.figure()
    plt.title("SDCA learning loss vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Loss")
    plt.plot(sdca_hist_loss)

    # final accuracy
    print("final accuracy SDCA :", sdca_clf.score_accuracy(X_test, y_test))

    sgd_hist_accuracy = get_hist_accuracy(X_test, y_test, sgd_hist_w, sgd_clf)
    sdca_hist_accuracy = get_hist_accuracy(X_test, y_test, sdca_hist_w,
                                           sdca_clf)
    plt.figure()
    plt.plot(sgd_hist_accuracy, c='b', label="SGD")
    plt.plot(sdca_hist_accuracy, c='g', label="SDCA")
    plt.title("Test accuracy vs. iteration\non data set " + data_name)
    plt.xlabel("Iteration")
    plt.ylabel("Accuracy")
    plt.legend()
from typing import Callable

import matplotlib.pyplot as plt
import numpy as np

import engine.utils.malaptools as malaptools
from engine.estimators.logistic_regression import LogisticRegression
from engine.optimizers.sdca_logistic import LogisticSDCA
from engine.optimizers.sgd_logistic import LogisticSGD
from engine.utils import projections

DEFAULT_SGD = LogisticSGD(c=1, eps=1e-3)
DEFAULT_SDCA = LogisticSDCA(c=1)


def plot_learning(
    x,
    y,
    chosen_sgd=DEFAULT_SGD,
    chosen_sdca=DEFAULT_SDCA,
    nb_epochs=1,
    comp_sgd=True,
    comp_sdca=True,
    is_malaptool=False,
    verbose_all=False,
    projection: Callable[[np.ndarray],
                         np.ndarray] = projections.identity_projection):
    # make estimator
    if comp_sgd:
        sgd = chosen_sgd
        sgd_clf = LogisticRegression(optimizer=sgd, projection=projection)
示例#11
0
        Z[:, k + 1] = np.multiply(X[:, i], X[:, i])
        k += 2
        for j in range(i + 1, dim):
            Z[:, k] = np.multiply(X[:, i], X[:, j])
            k += 1
    return Z


if False:
    X_poly = proj_degr2(X)
    normalizer = Normalizer(X_poly)
    Xnorm_poly = normalizer.normalize(X_poly)

if False:
    # make estimator
    sgd = LogisticSGD(c=10, eps=1e-38)
    sgd_clf = LogisticRegression(optimizer=sgd)

    sdca = LogisticSDCA(c=10)
    sdca_clf = LogisticRegression(optimizer=sdca)

    nb_epoch = 5

    X_proj = proj_degr2(X)
    X_proj_norm = normalize(X_proj)
    # train estimator with history
    sgd_hist_w_proj, sgd_hist_loss_proj = sgd_clf.fit(X_proj_norm,
                                                      Y,
                                                      epochs=nb_epoch,
                                                      save_hist=True)
    sgd_hist_w_proj = np.array(sgd_hist_w_proj)