def experiment_acc(dataset, loss, penalty, lmbda):

    acc = {}
    random.seed(0)
    np.random.seed(0)
    if dataset == 'mnist':
        size = 60000
    elif dataset == 'svhn':
        size = 604388
    elif dataset == 'rcv1':
        size = 781265
    X, y = load_experiment(dataset=dataset,
                           synth_params=None,
                           size=size,
                           redundant=0,
                           noise=0,
                           classification=True)
    score = 0
    k = 0
    while k < 3:
        k += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        estimator = fit_estimator(X_train,
                                  y_train,
                                  loss=loss,
                                  penalty=penalty,
                                  mu=1,
                                  lmbda=lmbda,
                                  intercept=False,
                                  max_iter=10000)
        score += estimator.score(X_test, y_test)
    acc['{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)] = score / 3
    print('{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda), ' : Done !')

    save_dataset_folder = os.path.join(RESULTS_PATH, 'accuracies')
    os.makedirs(save_dataset_folder, exist_ok=True)
    np.save(
        os.path.join(save_dataset_folder,
                     '{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)), acc)
    print('RESULTS SAVED!')

    return
            self.scores = rank_dataset_accelerated(X, y, self.z, self.scaling,
                                                   self.L, self.I_k_vec,
                                                   self.g, self.mu,
                                                   self.classification,
                                                   self.intercept, self.cut)
        return self.scores


if __name__ == "__main__":
    # simple test
    from sklearn.model_selection import train_test_split
    from utils.loaders import load_experiment

    X, y = load_experiment(dataset='cifar10_kernel',
                           synth_params=None,
                           size=10000,
                           redundant=0,
                           noise=None,
                           classification=True)

    #random.seed(0)
    #np.random.seed(0)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    z_init = np.random.rand(X_train.shape[1])
    screener = EllipsoidScreener(lmbda=0,
                                 mu=0,
                                 loss='safe_logistic',
                                 penalty='l2',
                                 intercept=False,
                                 classification=True,
                                 n_ellipsoid_steps=2000,
示例#3
0
def experiment_reg(dataset, synth_params, size, scale_data, redundant, noise,
                   nb_delete_steps, lmbda, mu, loss, penalty, intercept,
                   n_ellipsoid_steps, better_init, better_radius, cut,
                   get_ell_from_subset, clip_ell, use_sphere, guarantee,
                   nb_exp, nb_test, plot, zoom, dontsave):

    print('START')

    exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_intercept_{}_mu_{}_redundant_{}_noise_{}_better_init_{}_better_radius_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_nds_{}'.format(
        size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, intercept,
        mu, redundant, noise, better_init, better_radius, cut, clip_ell,
        use_sphere, nb_delete_steps)
    print(exp_title)

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    scores_regular_all = []
    scores_ell_all = []
    scores_r_all = []
    safe_guarantee = np.array([0., 0.])

    compt_exp = 0
    nb_safe_ell_all = 0

    while compt_exp < nb_exp:
        random.seed(compt_exp + 1)
        np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        print('Ellipsoid steps to be done : ', n_ellipsoid_steps)
        screener_ell = EllipsoidScreener(lmbda=lmbda,
                                         mu=mu,
                                         loss=loss,
                                         penalty=penalty,
                                         intercept=intercept,
                                         classification=True,
                                         n_ellipsoid_steps=n_ellipsoid_steps,
                                         better_init=better_init,
                                         better_radius=better_radius,
                                         cut=cut,
                                         clip_ell=clip_ell,
                                         use_sphere=use_sphere)

        if scale_data:
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)

        if get_ell_from_subset != 0:
            random_subset = random.sample(range(0, X_train.shape[0]),
                                          get_ell_from_subset)
            screener_ell.fit(X_train[random_subset], y_train[random_subset])
        else:
            screener_ell.fit(X_train, y_train)

        scores_screenell = screener_ell.screen(X_train, y_train)
        idx_screenell = np.argsort(scores_screenell)

        print('SCORES_ELL', scores_screenell[:10])

        nb_safe_ell_all += get_nb_safe(scores_screenell,
                                       mu,
                                       classification=True)

        scores_regular = []
        scores_ell = []
        scores_r = []

        nb_to_del_table = None

        if guarantee:
            idx_safeell = np.where(scores_screenell > -mu)[0]
            if len(idx_safeell) != 0:
                estimator_whole = fit_estimator(X_train, y_train, loss,
                                                penalty, mu, lmbda, intercept)
                estimator_screened = fit_estimator(X_train[idx_safeell],
                                                   y_train[idx_safeell], loss,
                                                   penalty, mu, lmbda,
                                                   intercept)
                temp = np.array([
                    estimator_whole.score(X_train, y_train),
                    estimator_screened.score(X_train, y_train)
                ])
                safe_guarantee += temp
                print('SAFE GUARANTEE : ', temp)

        if nb_delete_steps != 0:
            nb_to_del_table = np.sqrt(
                np.linspace(1, X_train.shape[0], nb_delete_steps, dtype='int'))
            nb_to_del_table = np.ceil(
                nb_to_del_table *
                (X_train.shape[0] / nb_to_del_table[-1])).astype(int)

            X_r = X_train
            y_r = y_train

            for i, nb_to_delete in enumerate(nb_to_del_table):
                if i == 0:
                    score_regular = 0
                score_ell = 0
                score_r = 0
                compt = 0

                X_screenell, y_screenell = X_train[idx_screenell[
                    nb_to_delete:]], y_train[idx_screenell[nb_to_delete:]]
                X_r, y_r = X_train[nb_to_delete:], y_train[nb_to_delete:]
                if not (dataset_has_both_labels(y_r)):
                    print(
                        'Warning, only one label in randomly screened dataset')
                if not (dataset_has_both_labels(y_screenell)):
                    print('Warning, only one label in screenell dataset')
                if not (dataset_has_both_labels(y_r)
                        and dataset_has_both_labels(y_screenell)):
                    break
                print('X_train :', X_train.shape, 'X_screenell :',
                      X_screenell.shape, 'X_random : ', X_r.shape)
                while compt < nb_test:
                    compt += 1
                    if i == 0:
                        estimator_regular = fit_estimator(X_train,
                                                          y_train,
                                                          loss=loss,
                                                          penalty=penalty,
                                                          mu=mu,
                                                          lmbda=lmbda,
                                                          intercept=intercept)
                    estimator_screenell = fit_estimator(X_screenell,
                                                        y_screenell,
                                                        loss=loss,
                                                        penalty=penalty,
                                                        mu=mu,
                                                        lmbda=lmbda,
                                                        intercept=intercept)
                    estimator_r = fit_estimator(X_r,
                                                y_r,
                                                loss=loss,
                                                penalty=penalty,
                                                mu=mu,
                                                lmbda=lmbda,
                                                intercept=intercept)

                    if i == 0:
                        score_regular += estimator_regular.score(
                            X_test, y_test)
                    score_ell += estimator_screenell.score(X_test, y_test)
                    score_r += estimator_r.score(X_test, y_test)

                scores_regular.append(score_regular / nb_test)
                scores_ell.append(score_ell / nb_test)
                scores_r.append(score_r / nb_test)

            scores_regular_all.append(scores_regular)
            scores_ell_all.append(scores_ell)
            scores_r_all.append(scores_r)

    print('Number of datapoints we can safely screen with ellipsoid method:',
          nb_safe_ell_all / nb_exp)

    data = {
        'nb_to_del_table': nb_to_del_table,
        'scores_regular': scores_regular_all,
        'scores_ell': scores_ell_all,
        'scores_r': scores_r_all,
        'nb_safe_ell': nb_safe_ell_all / nb_exp,
        'train_set_size': X_train.shape[0],
        'safe_guarantee': safe_guarantee / nb_exp
    }
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    if plot:
        plot_experiment(data, zoom=zoom)

    print('END')

    return
def experiment_tradeoff(dataset, synth_params, size, scale_data, redundant,
                        noise, lmbda, mu, loss, penalty, intercept, acc,
                        rescale, n_ellipsoid_steps, better_init, cut,
                        get_ell_from_subset, clip_ell, use_sphere, guarantee,
                        nb_exp, plot, zoom, dontsave):

    print('START')

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    if acc:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_acc'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    elif rescale:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff_rescale'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    else:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    print(exp_title)

    nb_epochs = int(better_init + n_ellipsoid_steps * get_ell_from_subset /
                    (0.8 * X.shape[0]))
    scores_screening_all = np.zeros(nb_epochs)
    safe_guarantee = np.array([0., 0.])

    compt_exp = 0

    while compt_exp < nb_exp:
        #random.seed(compt_exp + 1)
        #np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        if acc:
            for i in range(nb_epochs):
                estimator = LinearSVC(loss='squared_hinge',
                                      dual=False,
                                      C=1 / lmbda,
                                      fit_intercept=False,
                                      max_iter=i + 1,
                                      tol=1.0e-20).fit(X_train, y_train)
                scores_screening_all[i] += estimator.score(X_test, y_test)
                print(scores_screening_all[i])
            print('SCORES', scores_screening_all)
        else:
            for i in range(nb_epochs):
                i = i + 1
                if i <= better_init:
                    screener_dg = DualityGapScreener(lmbda=lmbda,
                                                     n_epochs=i).fit(
                                                         X_train, y_train)
                    z_init = screener_dg.z
                    rad_init = screener_dg.squared_radius
                    scores = screener_dg.screen(X_train, y_train)
                    scores_screening_all[i - 1] += get_nb_safe(
                        scores, mu, classification=True)
                    print('SCREEN DG RADIUS', screener_dg.squared_radius)
                elif better_init < i <= nb_epochs:
                    if rescale:
                        lmbda_ = lmbda * X_train.shape[0] / get_ell_from_subset
                    else:
                        lmbda_ = lmbda
                    random_subset = random.sample(range(0, X_train.shape[0]),
                                                  get_ell_from_subset)
                    screener_ell = EllipsoidScreener(
                        lmbda=lmbda_,
                        mu=mu,
                        loss=loss,
                        penalty=penalty,
                        intercept=intercept,
                        classification=True,
                        n_ellipsoid_steps=int(
                            (i - better_init) * X_train.shape[0] /
                            get_ell_from_subset),
                        better_init=0,
                        better_radius=0,
                        cut=cut,
                        clip_ell=clip_ell,
                        use_sphere=use_sphere).fit(X_train[random_subset],
                                                   y_train[random_subset],
                                                   init=z_init,
                                                   rad=rad_init)
                    scores = screener_ell.screen(X_train, y_train)
                    scores_screening_all[i - 1] += get_nb_safe(
                        scores, mu, classification=True)
                    if use_sphere:
                        print('SCREEN ELL RADIUS', screener_ell.squared_radius)

            if guarantee:
                idx_safeell = np.where(scores > -mu)[0]
                print('SCORES ', scores)
                print('NB TO KEEP', len(idx_safeell))
                if len(idx_safeell) != 0:
                    estimator_whole = fit_estimator(X_train, y_train, loss,
                                                    penalty, mu, lmbda,
                                                    intercept)
                    if rescale:
                        lmbda_ = lmbda * X_train.shape[0] / len(idx_safeell)
                    estimator_screened = fit_estimator(X_train[idx_safeell],
                                                       y_train[idx_safeell],
                                                       loss, penalty, mu,
                                                       lmbda_, intercept)
                    temp = np.array([
                        estimator_whole.score(X_train, y_train),
                        estimator_screened.score(X_train, y_train)
                    ])
                    print('SAFE GUARANTEE : ', temp)
                    safe_guarantee += temp

    if acc:
        scores_screening_all = scores_screening_all * X_train.shape[0]
    data = {
        'step_table':
        better_init + n_ellipsoid_steps *
        (get_ell_from_subset / X_train.shape[0]),
        'scores_screening':
        scores_screening_all / (X_train.shape[0] * nb_exp),
        'safe_guarantee':
        safe_guarantee / nb_exp
    }
    print(data)
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    if plot:
        plot_experiment(data, zoom=zoom)

    print('END')

    return
def experiment_regpath(dataset, synth_params, size, scale_data, redundant,
                       noise, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num,
                       mu, loss, penalty, intercept, n_ellipsoid_steps,
                       n_epochs, n_epochs_ell_path, cut, get_ell_from_subset,
                       clip_ell, use_sphere, nb_exp, dontsave):

    print('START')

    exp_title = 'X_size_{}_ell_subset_{}_loss_{}_n_ell_{}_mu_{}_cut_ell_{}_n_epochs_{}_n_ell_path_{}_use_sphere_{}_start_{}_end_{}_num_{}_regpath'.format(
        size, get_ell_from_subset, loss, n_ellipsoid_steps, mu, cut, n_epochs,
        n_epochs_ell_path, use_sphere, lmbda_grid_start, lmbda_grid_end,
        lmbda_grid_num)
    print(exp_title)

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    data = {}

    lmbda_grid = np.logspace(lmbda_grid_start,
                             lmbda_grid_end,
                             num=lmbda_grid_num)
    for lmbda in lmbda_grid:
        data['budget_ell_lmbda_{}'.format(lmbda)] = 0
        data['budget_noscreen_lmbda_{}'.format(lmbda)] = 0
        data['score_ell_lmbda_{}'.format(lmbda)] = 0
        data['score_noscreen_lmbda_{}'.format(lmbda)] = 0
    compt_exp = 0

    while compt_exp < nb_exp:
        random.seed(compt_exp + 1)
        np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2)

        for lmbda in lmbda_grid:
            print('---------- LMBDA ---------: ', lmbda)
            budget_ell = 0
            budget_noscreen = 0
            if lmbda == lmbda_grid[0]:
                screener_ell = EllipsoidScreener(
                    lmbda=lmbda * X_train.shape[0] / get_ell_from_subset,
                    mu=mu,
                    loss=loss,
                    penalty=penalty,
                    intercept=intercept,
                    classification=True,
                    n_ellipsoid_steps=n_ellipsoid_steps,
                    cut=cut,
                    clip_ell=clip_ell,
                    use_sphere=use_sphere,
                    ars=True)
                screener_dg = DualityGapScreener(lmbda=lmbda,
                                                 n_epochs=n_epochs,
                                                 ars=True)
                screener_dg.fit(X_train, y_train)
                print('Init radius : ', screener_dg.squared_radius)
                random_subset = random.sample(range(0, X_train.shape[0]),
                                              get_ell_from_subset)
                screener_ell.fit(X_train[random_subset],
                                 y_train[random_subset],
                                 init=screener_dg.z,
                                 rad=screener_dg.squared_radius)

                svc = BinaryClassifier(loss='sqhinge',
                                       penalty=penalty,
                                       fit_intercept=intercept)
                svc.fit(X_train,
                        y_train,
                        solver='qning-svrg',
                        lambd=lmbda,
                        verbose=False)

                svc_ell = BinaryClassifier(loss='sqhinge',
                                           penalty=penalty,
                                           fit_intercept=intercept)
                svc_ell.fit(X_train,
                            y_train,
                            solver='qning-svrg',
                            lambd=lmbda,
                            verbose=False)

            else:

                budget_fit_solver_noscreen = svc.fit(X_train,
                                                     y_train,
                                                     solver='qning-svrg',
                                                     it0=1,
                                                     lambd=lmbda,
                                                     restart=True,
                                                     verbose=False)[0, -1]
                print('Epoch fit solver no screen :',
                      budget_fit_solver_noscreen)
                budget_noscreen += budget_fit_solver_noscreen * X_train.shape[0]
                print('Budget solver no screen :', budget_noscreen)

                info = svc_ell.fit(X_train,
                                   y_train,
                                   solver='qning-svrg',
                                   lambd=lmbda,
                                   verbose=False,
                                   max_epochs=n_epochs_ell_path,
                                   it0=1,
                                   restart=True)
                dg = info[1, -1] - info[2, -1]

                screener_ell = EllipsoidScreener(
                    lmbda=lmbda * X_train.shape[0] / get_ell_from_subset,
                    mu=mu,
                    loss=loss,
                    penalty=penalty,
                    intercept=intercept,
                    classification=True,
                    n_ellipsoid_steps=n_ellipsoid_steps,
                    cut=cut,
                    clip_ell=clip_ell,
                    use_sphere=use_sphere,
                    ars=True)
                random_subset = random.sample(range(0, X_train.shape[0]),
                                              get_ell_from_subset)
                print('Init rad : ', 2 * dg / lmbda)
                screener_ell.fit(X_train[random_subset],
                                 y_train[random_subset],
                                 init=svc_ell.w,
                                 rad=2 * dg / lmbda)
                if use_sphere and n_ellipsoid_steps > 0:
                    print('Final rad : ', screener_ell.squared_radius)

                scores_ell = screener_ell.screen(X_train, y_train)
                tokeep = np.where(scores_ell > -mu)[0]
                print('To keep : ', len(tokeep))

                budget_fit_solver = svc_ell.fit(
                    X_train[tokeep],
                    y_train[tokeep],
                    solver='qning-svrg',
                    it0=1,
                    lambd=lmbda * (X_train.shape[0] / len(tokeep)),
                    restart=True,
                    verbose=False)[0, -1]

                budget_init_ell = (n_epochs_ell_path) * X_train.shape[0]
                budget_fit_ell = n_ellipsoid_steps * get_ell_from_subset
                if cut:
                    budget_fit_ell += get_ell_from_subset
                budget_ell += budget_init_ell + budget_fit_ell + budget_fit_solver * len(
                    tokeep)

                print('Epoch fit solver screen', budget_fit_solver)
                print('Budget solver screen : ', budget_init_ell,
                      budget_fit_ell, budget_fit_solver * len(tokeep))

            score_ell = svc_ell.score(X_train, y_train)
            score_noscreen = svc.score(X_train, y_train)

            print('Score on screened : ', score_ell, 'Score on whole : ',
                  score_noscreen)

            data['budget_ell_lmbda_{}'.format(lmbda)] += budget_ell
            data['budget_noscreen_lmbda_{}'.format(lmbda)] += budget_noscreen
            data['score_ell_lmbda_{}'.format(lmbda)] += score_ell
            data['score_noscreen_lmbda_{}'.format(lmbda)] += score_noscreen

    data = {k: float(data[k] / nb_exp) for k in data}
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    print('END')

    print(data)

    return
                                        g=None,
                                        mu=1,
                                        classification=True,
                                        intercept=False,
                                        cut=False)


if __name__ == "__main__":
    # simple test
    from sklearn.model_selection import train_test_split
    from utils.loaders import load_experiment
    import random

    X, y = load_experiment(dataset='mnist',
                           synth_params=None,
                           size=60000,
                           redundant=0,
                           noise=None,
                           classification=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    prop = np.unique(y_test, return_counts=True)[1]
    print('BASELINE : ', 1 - prop[1] / prop[0])

    screener = DualityGapScreener(lmbda=1e-5, n_epochs=9,
                                  ars=True).fit(X_train, y_train)
    print('Squared Radius : ', 2 * screener.dg / 1e-5)
    print('Score : ', screener.score(X_test, y_test))

    svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2')
    budget_fit_solver = svc_ell.fit(