def mapper(key, output_collector):
    """
    # debug mapper
    config = json.load(open(os.path.join(WD, "config_cv_largerange.json"), "r"))
    load_globals(config)
    resample(config, 'refit/refit')
    key = ('enettv', 0.01, 0.1, 0.3)
    """
    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(
        key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1 - l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)

    scaler = preprocessing.StandardScaler().fit(Xtr)
    Xtr = scaler.transform(Xtr)
    Xte = scaler.transform(Xte)

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LinearRegressionL1L2TV(l1,
                                                l2,
                                                tv,
                                                GLOBAL.Atv,
                                                algorithm=conesta)
        mod.fit(Xtr, ytr.ravel())
    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.LinearRegressionL1L2GraphNet(l1,
                                                      l2,
                                                      tv,
                                                      GLOBAL.Agn,
                                                      algorithm=fista)
        mod.fit(Xtr, ytr.ravel())

    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.ElasticNet(l1l2ratio, algorithm=fista)
        mod.fit(Xtr, ytr.ravel())
    else:
        raise Exception('Algo%s not handled' % algo)

    #mod.fit(Xtr, ytr.ravel())
    y_pred = mod.predict(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, beta=mod.beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
Пример #2
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL # access to global variables:
    # key: list of parameters
    alpha, l1_ratio = key[0], key[1]
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ztr = GLOBAL.DATA_RESAMPLED["z"][0]
    zte = GLOBAL.DATA_RESAMPLED["z"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape
    #
    #mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start=1, mean=True)
    mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start = 11, mean = True)     #since we residualize BMI with 2 categorical covariables (8 columns) and 2 ordinal variables
    z_pred = mod.fit(Xtr,ztr).predict(Xte)
    ret = dict(z_pred=z_pred, z_true=zte, beta=mod.beta)
    output_collector.collect(key, ret)
Пример #3
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables (GLOBAL.DATA)
    alpha, l1_ratio = key[0], key[1]
    # mod = ElasticNet(alpha=key[0], l1_ratio=key[1])
    print "i am a work that works"
    #mod = estimators.ElasticNet(alpha*l1_ratio, penalty_start = 1, mean = True)
    mod = estimators.ElasticNet(
        alpha * l1_ratio, penalty_start=11, mean=True
    )  #since we residualize BMI with 2 categorical covariables (8 columns) and 2 ordinal variables
    z_pred = mod.fit(GLOBAL.DATA_RESAMPLED["X"][0],
                     GLOBAL.DATA_RESAMPLED["z"][0]).predict(
                         GLOBAL.DATA_RESAMPLED["X"][1])
    output_collector.collect(key,
                             dict(z_pred=z_pred,
                                  z_true=GLOBAL.DATA_RESAMPLED["z"][1]),
                             beta=mod.beta)
Пример #4
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    # key: list of parameters
    alpha, l1_ratio = key[0], key[1]
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ztr = GLOBAL.DATA_RESAMPLED["z"][0]
    zte = GLOBAL.DATA_RESAMPLED["z"][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape
    #
    mod_PP = estimators.ElasticNet(l1_ratio,
                                   alpha=alpha,
                                   penalty_start=1,
                                   mean=True)
    z_pred_PP = mod_PP.fit(Xtr, ztr).predict(Xte)
    ret = dict(z_pred=z_pred_PP, z_true=zte, beta=mod_PP.beta, model=mod_PP)
    output_collector.collect(key, ret)
Пример #5
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL
    # key: list of parameters
    alpha, l1_ratio = key[0], key[1]
    Xtr = GLOBAL.DATA_RESAMPLED['X'][0]
    Xte = GLOBAL.DATA_RESAMPLED['X'][1]
    ztr = GLOBAL.DATA_RESAMPLED['z'][0]
    zte = GLOBAL.DATA_RESAMPLED['z'][1]
    print key, "Data shape:", Xtr.shape, Xte.shape, ztr.shape, zte.shape
    # penalty_start since we residualized BMI with 2 categorical covariables
    # (Gender and ImagingCentreCity - 8 columns) and 3 ordinal variables
    # (tiv_gaser, tiv_gaser² and mean_pds - 3 columns)
    penalty_start = 12
    mod = estimators.ElasticNet(l1_ratio,
                                alpha,
                                penalty_start=penalty_start,
                                mean=True)
    z_pred = mod.fit(Xtr, ztr).predict(Xte)
    ret = dict(z_pred=z_pred, z_true=zte, beta=mod.beta)
    output_collector.collect(key, ret)
Пример #6
0
    # Initialize beta_map
    beta_map = np.zeros(X.shape[1])

    # Elasticnet algorithm via Pylearn-Parsimony
    print "Elasticnet algorithm"
    alpha = 0.006
    l1_ratio = 0.8
    #l1_ratio = 0

    # Since we residualized BMI with 2 categorical covariables (Gender and
    # ImagingCentreCity - 8 columns) and 2 ordinal variables (tiv_gaser and
    # mean_pds - 2 columns)
    penalty_start = 11
    mod = estimators.ElasticNet(l1_ratio,
                                alpha,
                                penalty_start=penalty_start,
                                mean=True)
    mod.fit(X, z)
    print "Compute beta values"
    beta_map = mod.beta
    print "Compute R2"
    r2 = r2_score(z, mod.predict(X))
    print r2

    # Use mask
    template_for_size_img = ni.load(MASK_PATH)
    mask_data = template_for_size_img.get_data()
    masked_data_index = (mask_data != 0.0)

    # Draw beta map
    print "Draw beta map"
    if not os.path.exists(SHARED_DIR):
        os.makedirs(SHARED_DIR)

    # Load data
    print "Load data"
    X_init, X_res, z = load_residualized_bmi_data(cache=False)
    np.save(os.path.join(WD, 'X_res.npy'), X_res)
    np.save(os.path.join(WD, "z.npy"), z)

    #
    (n, p) = X_init.shape
    gamma = 1
    groups = [[j] for j in range(0, p)]
    print "Compute ElasticNet algorithm"
    enet_PP = estimators.ElasticNet(l=0.8,
                                    alpha=0.006,
                                    penalty_start=11,
                                    mean=True)
    enet_PP.fit(X_res, z)
    print "Compute beta values"
    beta = enet_PP.beta
    beta = beta[11:]  #do not consider covariates
    print "Compute the weights using Parsimony's ElasticNet algorithm."
    weights = [
        math.pow(abs(beta[j[0]]) + 1 / float(n), -gamma) for j in groups
    ]

    # Adaptive Elasticnet algorithm
    adaptive_enet = estimators.LinearRegressionL1L2GL(
        l1=0,
        l2=0.8,
        gl=0.006,
Пример #8
0
    FOLD = 0
    TRAIN = 0
    TEST = 1
    Xtrain = X[cv[FOLD][TRAIN], ...]
    Xtest = X[cv[FOLD][TEST], ...]
    ztrain = z[cv[FOLD][TRAIN], ...]
    ztest = z[cv[FOLD][TEST], ...]

    # alpha l1_ratio
    alpha = 1.0
    l1_ratio = 0.9

    #parsimony
    XtrainPP = np.hstack((np.ones((ztrain.shape[0], 1)), Xtrain))
    XtestPP = np.hstack((np.ones((ztest.shape[0], 1)), Xtest))
    mod_PP = estimators.ElasticNet(l1_ratio,
                                   alpha=alpha,
                                   penalty_start=1,
                                   mean=True)

    time_curr = time.time()
    z_pred_PP = mod_PP.fit(XtrainPP, ztrain).predict(XtestPP)
    print "Parsimony elapsed time: ", time.time() - time_curr
    print "Parsimony r2:", r2_score(ztest, z_pred_PP)

    time_curr = time.time()
    mod_SL = ElasticNet(alpha, l1_ratio, fit_intercept=True)
    z_pred_SL = mod_SL.fit(Xtrain, ztrain).predict(Xtest)
    print "Scikit elapsed time: ", time.time() - time_curr
    print "Scikit r2:", r2_score(ztest, z_pred_SL)
Пример #9
0
        sklearn.linear_model.Lasso(alpha=alpha / n_train,
                                   fit_intercept=True)

MODELS["l1_inter__fista"] = \
    estimators.Lasso(l=alpha,
                     mean=False,
                     penalty_start=1)

## Enet + fista
if has_sklearn:
    MODELS["l1l2__sklearn"] = \
        sklearn.linear_model.ElasticNet(alpha=alpha,
                                        l1_ratio=.5,
                                        fit_intercept=False)
MODELS["l1l2__fista"] = \
    estimators.ElasticNet(alpha=alpha, l=.5)

if has_sklearn:
    MODELS["l1l2_inter__sklearn"] = \
        sklearn.linear_model.ElasticNet(alpha=alpha,
                                        l1_ratio=.5,
                                        fit_intercept=True)

MODELS["l1l2_inter__fista"] = \
    estimators.ElasticNet(alpha=alpha, l=.5,
                          penalty_start=1)

## LinearRegressionL1L2TV, Parsimony only
# Minimize:
# f(beta, X, y) = (1 / (2 * n)) * ||Xbeta - y||²_2
#                        + l1 * ||beta||_1
def mapper(key, output_collector):

    import mapreduce as GLOBAL
    Xtr = GLOBAL.DATA_RESAMPLED["X"][0]
    Xte = GLOBAL.DATA_RESAMPLED["X"][1]
    ytr = GLOBAL.DATA_RESAMPLED["y"][0]
    yte = GLOBAL.DATA_RESAMPLED["y"][1]

    # key = 'enettv_0.01_0.1_0.2'.split("_")
    algo, alpha, l1l2ratio, tvratio = key[0], float(key[1]), float(
        key[2]), float(key[3])

    tv = alpha * tvratio
    l1 = alpha * float(1 - tv) * l1l2ratio
    l2 = alpha * float(1 - tv) * (1 - l1l2ratio)

    print(key, algo, alpha, l1, l2, tv)

    scaler = preprocessing.StandardScaler().fit(Xtr[:, 1:])
    Xtr[:, 1:] = scaler.transform(Xtr[:, 1:])
    Xte[:, 1:] = scaler.transform(Xte[:, 1:])

    if algo == 'enettv':
        conesta = algorithms.proximal.CONESTA(max_iter=10000)
        mod = estimators.LinearRegressionL1L2TV(l1,
                                                l2,
                                                tv,
                                                GLOBAL.Atv,
                                                algorithm=conesta,
                                                penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.beta

    elif algo == 'enetgn':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.LinearRegressionL1L2GraphNet(
            l1,
            l2,
            tv,
            GLOBAL.Agn,
            algorithm=fista,
            penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.beta

    elif algo == 'enet':
        fista = algorithms.proximal.FISTA(max_iter=5000)
        mod = estimators.ElasticNet(l1l2ratio,
                                    algorithm=fista,
                                    penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.beta

    elif algo == 'Ridge':
        mod = estimators.RidgeRegression(l1l2ratio,
                                         penalty_start=penalty_start)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.beta

    elif algo == 'RidgeAGD':
        mod = estimators.RidgeRegression(l1l2ratio,\
        algorithm=gradient.GradientDescent(max_iter=1000),penalty_start = penalty_start )
        mod.fit(Xtr, ytr.ravel())
        beta = mod.beta

    elif algo == 'linearSklearn':
        mod = linear_model.LinearRegression(fit_intercept=False)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.coef_
        beta = beta.reshape(beta.shape[0], 1)

    elif algo == 'SkRidge':
        mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=False)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.coef_
        beta = beta.reshape(beta.shape[0], 1)

    elif algo == 'SkRidgeInt':
        mod = linear_model.Ridge(alpha=l1l2ratio, fit_intercept=True)
        mod.fit(Xtr, ytr.ravel())
        beta = mod.coef_
        beta = beta.reshape(beta.shape[0], 1)
    else:
        raise Exception('Algo%s not handled' % algo)

    y_pred = mod.predict(Xte)
    ret = dict(y_pred=y_pred, y_true=yte, beta=beta)
    if output_collector:
        output_collector.collect(key, ret)
    else:
        return ret
X = X3d.reshape((n_samples, np.prod(shape)))
n_train = 100
Xtr = X[:n_train, :]
ytr = y[:n_train]
Xte = X[n_train:, :]
yte = y[n_train:]
alpha = 1.  # global penalty

###########################################################################
## Elasticnet
# Min: (1 / (2 * n)) * ||X * beta - y||²_2
#              + alpha * l * ||beta||_1
#              + alpha * ((1.0 - l) / 2) * ||beta||²_2
# Parsimony Elasticnet is based on FISTA, is then slower that scikit-learn one
l1_ratio = .5
enet = estimators.ElasticNet(alpha=alpha, l=.5)
yte_pred_enet = enet.fit(Xtr, ytr).predict(Xte)

###########################################################################
## Fit LinearRegressionL1L2TV
# Min: (1 / (2 * n)) * ||Xbeta - y||^2_2
#    + l1 * ||beta||_1
#    + (l2 / 2) * ||beta||^2_2
#    + tv * TV(beta)
#
l1, l2, tv = alpha * np.array((.33, .33, .33))  # l1, l2, tv penalties
A = nesterov_tv.linear_operator_from_shape(shape)
algo = algorithms.proximal.CONESTA(max_iter=500)
enettv = estimators.LinearRegressionL1L2TV(l1, l2, tv, A, algorithm=algo)
yte_pred_enettv = enettv.fit(Xtr, ytr).predict(Xte)