Пример #1
0
def parallel_function(
        dataset_name, method, tol=1e-8, n_outer=15):

    # load data
    X, y = fetch_libsvm(dataset_name)
    # subsample the samples and the features
    n_samples, n_features = dict_subsampling[dataset_name]
    t_max = dict_t_max[dataset_name]
    # t_max = 3600

    X, y = clean_dataset(X, y, n_samples, n_features)
    alpha_max, n_classes = get_alpha_max(X, y)
    log_alpha_max = np.log(alpha_max)  # maybe to change alpha max value

    algo = ImplicitForward(None, n_iter_jac=2000)
    estimator = LogisticRegression(
        C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False)

    model = SparseLogreg(estimator=estimator)
    idx_train, idx_val, idx_test = get_splits(X, y)

    logit_multiclass = LogisticMulticlass(
        idx_train, idx_val, algo, idx_test=idx_test)

    monitor = Monitor()
    if method == "implicit_forward":
        log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max)
        optimizer = LineSearch(n_outer=100)
        grad_search(
            algo, logit_multiclass, model, optimizer, X, y, log_alpha0,
            monitor)
    elif method.startswith(('random', 'bayesian')):
        max_evals = dict_max_eval[dataset_name]
        log_alpha_min = np.log(alpha_max) - 7
        hyperopt_wrapper(
            algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max,
            monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method,
            size_space=n_classes)
    elif method == 'grid_search':
        n_alphas = 20
        p_alphas = np.geomspace(1, 0.001, n_alphas)
        p_alphas = np.tile(p_alphas, (n_classes, 1))
        for i in range(n_alphas):
            log_alpha_i = np.log(alpha_max * p_alphas[:, i])
            logit_multiclass.get_val(
                model, X, y, log_alpha_i, None, monitor, tol)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.acc_vals = np.array(monitor.acc_vals).copy()
    monitor.acc_tests = np.array(monitor.acc_tests).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (
        dataset_name, method, tol, n_outer, monitor.times, monitor.objs,
        monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max,
        n_samples, n_features, n_classes)
Пример #2
0
from sparse_ho.optimizers import GradientDescent

from sparse_ho.ho import grad_search, hyperopt_wrapper
from sparse_ho.utils import Monitor
from sparse_ho.datasets.utils_datasets import (
    alpha_max_multiclass, clean_dataset, get_splits)


# load data
n_samples = 1_000
n_features = 1_000
# n_samples = 1_100
# n_features = 3_200
# X, y = fetch_libsvm('sensit')
# X, y = fetch_libsvm('usps')
X, y = fetch_libsvm('rcv1_multiclass')
# X, y = fetch_libsvm('sector_scale')
# X, y = fetch_libsvm('sector')
# X, y = fetch_libsvm('smallNORB')
# X, y = fetch_libsvm('mnist')


# clean data and subsample
X, y = clean_dataset(X, y, n_samples, n_features)
idx_train, idx_val, idx_test = get_splits(X, y)
n_samples, n_features = X.shape

algo = ImplicitForward(n_iter_jac=1000)
estimator = LogisticRegression(
    C=1, fit_intercept=False, warm_start=True, max_iter=2000, verbose=False)
Пример #3
0
from sparse_ho.ho import grad_search
from sparse_ho.utils import Monitor
from sparse_ho.models import SparseLogreg
from sparse_ho.criterion import HeldOutLogistic
from sparse_ho import ImplicitForward
from sparse_ho import Forward
from sparse_ho.grid_search import grid_search

print(__doc__)

dataset = 'rcv1_train'
# dataset = 'simu'

if dataset != 'simu':
    X, y = fetch_libsvm(dataset)
    X = X[:, :100]
else:
    X, y = make_classification(n_samples=100,
                               n_features=1_000,
                               random_state=42,
                               flip_y=0.02)

n_samples = X.shape[0]
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

print("Starting path computation...")
n_samples = len(y[idx_train])
alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train])))
Пример #4
0
from sparse_ho.criterion import HeldOutMSE
from sparse_ho import Forward
from sparse_ho import ImplicitForward
from sparse_ho.utils import Monitor
from sparse_ho.ho import grad_search
from sparse_ho.grid_search import grid_search

from libsvmdata.datasets import fetch_libsvm

print(__doc__)

dataset = 'rcv1'
# dataset = 'simu'

if dataset == 'rcv1':
    X, y = fetch_libsvm('rcv1_train')
else:
    X, y = make_regression(n_samples=1000, n_features=1000, noise=40)

n_samples = X.shape[0]
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

print("Starting path computation...")
n_samples = len(y[idx_train])
alpha_max = np.max(np.abs(X[idx_train, :].T.dot(
    y[idx_train]))) / len(idx_train)
log_alpha0 = np.log(alpha_max / 10)

n_alphas = 10
p_alphas = np.geomspace(1, 0.0001, n_alphas)
Пример #5
0
import sklearn
from sklearn.linear_model import LogisticRegression

from libsvmdata.datasets import fetch_libsvm

from sparse_ho.models import SparseLogreg
from sparse_ho.criterion import LogisticMulticlass
from sparse_ho import ImplicitForward
from sparse_ho.utils import Monitor
from sparse_ho.datasets.utils_datasets import (alpha_max_multiclass,
                                               clean_dataset)

# load data
n_samples = 1000
n_features = 10
X, y = fetch_libsvm('mnist')
my_bool = np.logical_or(np.logical_or(y == 0, y == 1), y == 2)

X = X[my_bool, :]
y = y[my_bool]
# clean data and subsample
X, y = clean_dataset(X, y, n_samples, n_features)
idx_train = np.arange(len(y) // 2)
idx_val = np.arange(len(y) // 2, len(y))

alpha_max, n_classes = alpha_max_multiclass(X, y)
tol = 1e-8

n_classes = np.unique(y).shape[0]

max_iter = 10000
Пример #6
0
from sparse_ho.ho import grad_search
from sparse_ho.utils import Monitor
from sparse_ho.utils_plot import discrete_cmap
from sparse_ho.optimizers import GradientDescent

# dataset = "rcv1"
dataset = 'simu'

##############################################################################
# Load some data

# dataset = 'rcv1'
dataset = 'simu'

if dataset == 'rcv1':
    X, y = fetch_libsvm('rcv1.binary')
    y -= y.mean()
    y /= np.linalg.norm(y)
else:
    X, y, _ = make_correlated_data(n_samples=200,
                                   n_features=400,
                                   snr=5,
                                   random_state=0)

n_samples = X.shape[0]
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

print("Starting path computation...")
alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)