Exemplo n.º 1
0
 model_name, global_pen, tv_ratio, l1_ratio = params
 if model_name == 'pca':
     model = sklearn.decomposition.PCA(n_components=N_COMP)
     model.components_ = components.T
     model.mean_ = X.mean(axis=0)
     computed_projections = model.transform(X)
 if model_name == 'struct_pca':
     # We don'really need those parameters here
     ltv = global_pen * tv_ratio
     ll1 = l1_ratio * global_pen * (1 - ltv)
     ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
     model = pca_tv.PCA_SmoothedL1_L2_TV(n_components=N_COMP,
                                         l1=ll1, l2=ll2, ltv=ltv,
                                         Atv=Atv,
                                         Al1=Al1,
                                         criterion="frobenius",
                                         eps=1e-6,
                                         max_iter=100,
                                         inner_max_iter=int(1e4),
                                         output=False)
     model.V = components
     computed_projections, _ = model.transform(X)
 # Projections
 projections_file = os.path.join(input_dir, "X_train_transform.npz")
 projections = np.load(projections_file)['arr_0']
 # Compare both
 assert (np.allclose(projections, computed_projections))
 # Create df for this model
 index = pd.MultiIndex.from_arrays([subjects_id,
                                    np.asarray([model_name] * n),
                                    np.asarray([global_pen] * n),
Exemplo n.º 2
0
alpha = 0.1
l1, l2, ltv = alpha * np.array((.4, 1/alpha, .001))
#k =0.5
#"l = 2*k
#g = 4

pca_sklearn = sklearn.decomposition.PCA(n_components=N_COMP)
pca_sklearn.fit(X)
V_pca_sklearn = pca_sklearn.components_.transpose()
del pca_sklearn

# ExcessiveGap
e_eg_sparse = pca_tv.PCA_SmoothedL1_L2_TV(l1, l2, ltv, Atv, Al1,
                                          n_components=N_COMP,
                                          criterion="frobenius",
                                          eps=1e-6,
                                          inner_max_iter=int(1e5),
                                          use_eg=True,
                                          output=True)
t = timeit.timeit(stmt='e_eg_sparse.fit(X)', setup="from __main__ import e_eg_sparse, X", number=1)
print "EG:", t
V_sparse_eg = e_eg_sparse.V
del e_eg_sparse

# CONESTA
e_con_sparse = pca_tv.PCA_SmoothedL1_L2_TV(50*l1, 50*l2, 50*ltv, Atv, Al1,
                                           n_components=N_COMP,
                                           criterion="frobenius",
                                           eps=1e-6,
#                                           inner_eps=1e-8,
                                           inner_max_iter=int(1e5),
Exemplo n.º 3
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    # GLOBAL.DATA
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    model_name, global_pen, tv_ratio, l1_ratio = key
    if model_name == 'pca':
        # Force the key
        global_pen = tv_ratio = l1_ratio = 0
    if model_name == 'sparse_pca':
        # Force the key
        tv_ratio = 0
        l1_ratio = 1
        ll1 = global_pen
    if model_name == 'struct_pca':
        ltv = global_pen * tv_ratio
        ll1 = l1_ratio * global_pen * (1 - tv_ratio)
        ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
        assert (np.allclose(ll1 + ll2 + ltv, global_pen))

    X_train = GLOBAL.DATA_RESAMPLED["X"][0]
    n, p = X_train.shape
    X_test = GLOBAL.DATA_RESAMPLED["X"][1]

    # A matrices
    Atv = GLOBAL.Atv
    Al1 = scipy.sparse.eye(p, p)

    # Fit model
    if model_name == 'pca':
        model = sklearn.decomposition.PCA(n_components=N_COMP)
    if model_name == 'sparse_pca':
        model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1)
    if model_name == 'struct_pca':
        model = pca_tv.PCA_SmoothedL1_L2_TV(n_components=N_COMP,
                                            l1=ll1,
                                            l2=ll2,
                                            ltv=ltv,
                                            Atv=Atv,
                                            Al1=Al1,
                                            criterion="frobenius",
                                            eps=1e-6,
                                            max_iter=100,
                                            inner_max_iter=int(1e4),
                                            output=False)
    t0 = time.clock()
    model.fit(X_train)
    t1 = time.clock()
    _time = t1 - t0
    #print "X_test", GLOBAL.DATA["X"][1].shape

    # Save the projectors
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        V = model.components_.T
    if model_name == 'struct_pca':
        V = model.V

    # Project train & test data
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_transform = model.transform(X_train)
        X_test_transform = model.transform(X_test)
    if (model_name == 'struct_pca'):
        X_train_transform, _ = model.transform(X_train)
        X_test_transform, _ = model.transform(X_test)

    # Reconstruct train & test data
    # For SparsePCA or PCA, the formula is: UV^t (U is given by transform)
    # For StructPCA this is implemented in the predict method (which uses
    # transform)
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_predict = np.dot(X_train_transform, V.T)
        X_test_predict = np.dot(X_test_transform, V.T)
    if (model_name == 'struct_pca'):
        X_train_predict = model.predict(X_train)
        X_test_predict = model.predict(X_test)

    # Compute Frobenius norm between original and recontructed datasets
    frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro')
    frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro')

    # Compute explained variance ratio
    evr_train = metrics.adjusted_explained_variance(X_train_transform)
    evr_train /= np.var(X_train, axis=0).sum()
    evr_test = metrics.adjusted_explained_variance(X_test_transform)
    evr_test /= np.var(X_test, axis=0).sum()

    # Compute geometric metrics and norms of components
    TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv)
    l0 = np.zeros((N_COMP, ))
    l1 = np.zeros((N_COMP, ))
    l2 = np.zeros((N_COMP, ))
    tv = np.zeros((N_COMP, ))
    for i in range(N_COMP):
        # Norms
        l0[i] = np.linalg.norm(V[:, i], 0)
        l1[i] = np.linalg.norm(V[:, i], 1)
        l2[i] = np.linalg.norm(V[:, i], 2)
        tv[i] = TV.f(V[:, i])

    ret = dict(frobenius_train=frobenius_train,
               frobenius_test=frobenius_test,
               components=V,
               X_train_predict=X_train_predict,
               X_test_predict=X_test_predict,
               X_train_transform=X_train_transform,
               X_test_transform=X_test_transform,
               evr_train=evr_train,
               evr_test=evr_test,
               l0=l0,
               l1=l1,
               l2=l2,
               tv=tv,
               time=_time)

    output_collector.collect(key, ret)
Exemplo n.º 4
0
                  number=1)
print "Sparse PCA:", t
V_sparsepca_sklearn = sparsepca_sklearn.components_.transpose()
del sparsepca_sklearn

# Struct PCA with few TV
print "Fitting StructPCA"
l2 = 1
ltv = alpha * .001
e1 = pca_tv.PCA_SmoothedL1_L2_TV(
    l1,
    l2,
    ltv,
    Atv,
    Al1,
    n_components=N_COMP,
    criterion="frobenius",
    eps=1e-6,
    #                                inner_eps=1e-8,
    inner_max_iter=int(1e5),
    use_eg=False,
    output=False)
t = timeit.timeit(stmt='e1.fit(X)',
                  setup="from __main__ import e1, X",
                  number=1)
print "Fitting StructPCA:", t
V1 = e1.V

# Struct PCA with more TV
print "Fitting StructPCA with more TV"
ltv = alpha * .01