Пример #1
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    model_name, global_pen, tv_ratio, l1_ratio = key
    if model_name == 'pca':
        # Force the key
        global_pen = tv_ratio = l1_ratio = 0

    if model_name == 'sparse_pca':

        global_pen = tv_ratio = 0
        ll1 = l1_ratio

    if model_name == 'struct_pca':
        ltv = global_pen * tv_ratio
        ll1 = l1_ratio * global_pen * (1 - tv_ratio)
        ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
        assert (np.allclose(ll1 + ll2 + ltv, global_pen))

    X_train = GLOBAL.DATA_RESAMPLED["X"][0]
    n, p = X_train.shape
    X_test = GLOBAL.DATA_RESAMPLED["X"][1]
    # A matrices
    Atv = GLOBAL.A
    N_COMP = GLOBAL.N_COMP

    # Fit model
    if model_name == 'pca':
        model = sklearn.decomposition.PCA(n_components=N_COMP)

    if model_name == 'sparse_pca':
        model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1)

    if model_name == 'struct_pca':
        model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                                    l1=ll1,
                                    l2=ll2,
                                    ltv=ltv,
                                    Atv=Atv,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e4),
                                    output=False)
    model.fit(X_train)

    # Save the projectors
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        V = model.components_.T
    if model_name == 'struct_pca':
        V = model.V

    # Project train & test data
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_transform = model.transform(X_train)
        X_test_transform = model.transform(X_test)

    if (model_name == 'struct_pca'):
        X_train_transform, _ = model.transform(X_train)
        X_test_transform, _ = model.transform(X_test)

    # Reconstruct train & test data
    # For SparsePCA or PCA, the formula is: UV^t (U is given by transform)
    # For StructPCA this is implemented in the predict method (which uses
    # transform)
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_predict = np.dot(X_train_transform, V.T)
        X_test_predict = np.dot(X_test_transform, V.T)

    if (model_name == 'struct_pca'):
        X_train_predict = model.predict(X_train)
        X_test_predict = model.predict(X_test)

    # Compute Frobenius norm between original and recontructed datasets
    frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro')
    frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro')
    print(frobenius_test)

    # Compute explained variance ratio
    evr_train = metrics.adjusted_explained_variance(X_train_transform)
    evr_train /= np.var(X_train, axis=0).sum()
    evr_test = metrics.adjusted_explained_variance(X_test_transform)
    evr_test /= np.var(X_test, axis=0).sum()

    # Remove predicted values (they are huge)
    del X_train_predict, X_test_predict

    ret = dict(frobenius_train=frobenius_train,
               frobenius_test=frobenius_test,
               components=V,
               X_train_transform=X_train_transform,
               X_test_transform=X_test_transform,
               evr_train=evr_train,
               evr_test=evr_test)

    output_collector.collect(key, ret)
Пример #2
0
ltv = global_pen * tv_ratio
ll1 = l1_ratio * global_pen * (1 - tv_ratio)
ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
assert (np.allclose(ll1 + ll2 + ltv, global_pen))

#Compute A and mask
masks = []
INPUT_OBJECT_MASK_FILE_FORMAT = "mask_{o}.npy"
for i in range(3):
    filename = INPUT_OBJECT_MASK_FILE_FORMAT.format(o=i)
    masks.append(np.load(filename))
im_shape = config["im_shape"]
Atv = nesterov_tv.A_from_shape(im_shape)

########################################
snapshot = AlgorithmSnapshot(
    '/neurospin/brainomics/2014_pca_struct/lambda_max/',
    saving_period=1).save_conesta
mod = pca_tv.PCA_L1_L2_TV(n_components=3,
                          l1=ll1,
                          l2=ll2,
                          ltv=ltv,
                          Atv=Atv,
                          criterion="frobenius",
                          eps=1e-4,
                          max_iter=100,
                          inner_max_iter=int(1e4),
                          output=True,
                          callback=snapshot)
mod.fit(X[:250, :])
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    model_name, global_pen, struct_ratio, l1_ratio = key
    if model_name == 'pca':
        global_pen = struct_ratio = l1_ratio = 0

    if model_name == 'sparse_pca':
        global_pen = struct_ratio = 0
        ll1 = l1_ratio

    if model_name == 'struct_pca':
        ltv = global_pen * struct_ratio
        ll1 = l1_ratio * global_pen * (1 - struct_ratio)
        ll2 = (1 - l1_ratio) * global_pen * (1 - struct_ratio)
        assert (np.allclose(ll1 + ll2 + ltv, global_pen))

    if model_name == 'graphNet_pca':
        lgn = global_pen * struct_ratio
        ll1 = l1_ratio * global_pen * (1 - struct_ratio)
        ll2 = (1 - l1_ratio) * global_pen * (1 - struct_ratio)
        assert (np.allclose(ll1 + ll2 + lgn, global_pen))

    X_train = GLOBAL.DATA_RESAMPLED["X"][0]
    n, p = X_train.shape
    X_test = GLOBAL.DATA_RESAMPLED["X"][1]
    # A matrices
    A = GLOBAL.A
    N_COMP = GLOBAL.N_COMP

    # Fit model
    if model_name == 'pca':
        model = sklearn.decomposition.PCA(n_components=N_COMP)

    if model_name == 'sparse_pca':
        model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1)

    if model_name == 'struct_pca':
        model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                                    l1=ll1,
                                    l2=ll2,
                                    ltv=ltv,
                                    Atv=A,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e3),
                                    output=False)
    if model_name == 'graphNet_pca':
        A = sparse.vstack(A)
        model = pca_struct.PCAGraphNet(n_components=N_COMP,
                                       l1=ll1,
                                       l2=ll2,
                                       lgn=lgn,
                                       Agn=A,
                                       criterion="frobenius",
                                       eps=1e-6,
                                       max_iter=500,
                                       output=False)
    model.fit(X_train)

    # Save the projectors
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        V = model.components_.T
    if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'):
        V = model.V

    # Project train & test data
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_transform = model.transform(X_train)
        X_test_transform = model.transform(X_test)

    if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'):
        X_train_transform, _ = model.transform(X_train)
        X_test_transform, _ = model.transform(X_test)

    # Reconstruct train & test data
    # For SparsePCA or PCA, the formula is: UV^t (U is given by transform)
    # For StructPCA this is implemented in the predict method (which uses
    # transform)
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_predict = np.dot(X_train_transform, V.T)
        X_test_predict = np.dot(X_test_transform, V.T)

    if (model_name == 'struct_pca') or (model_name == 'graphNet_pca'):
        X_train_predict = model.predict(X_train)
        X_test_predict = model.predict(X_test)

    # Compute Frobenius norm between original and recontructed datasets
    frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro')
    frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro')
    print(frobenius_test)

    del X_train_predict, X_test_predict

    ret = dict(frobenius_train=frobenius_train,
               frobenius_test=frobenius_test,
               components=V,
               X_train_transform=X_train_transform,
               X_test_transform=X_test_transform)

    output_collector.collect(key, ret)
Пример #4
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    model_name, global_pen, tv_ratio, l1_ratio = key
    if model_name == 'pca':
        # Force the key
        global_pen = tv_ratio = l1_ratio = 0
#    if model_name == 'sparse_pca':
#        # Force the key
#        ltv = 1e-6
#        ll1 = l1_ratio * global_pen
#        ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
        
    if model_name == 'sparse_pca':   
        
        global_pen = tv_ratio = 0
        ll1=l1_ratio
            
        
    if model_name == 'struct_pca':
        ltv = global_pen * tv_ratio
        ll1 = l1_ratio * global_pen * (1 - tv_ratio)
        ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio)
        assert(np.allclose(ll1 + ll2 + ltv, global_pen))

    X_train = GLOBAL.DATA_RESAMPLED["X"][0]
    print (X_train.shape)
    n, p = X_train.shape
    X_test = GLOBAL.DATA_RESAMPLED["X"][1]

    # A matrices
    Atv = GLOBAL.Atv


    # Fit model
    if model_name == 'pca':
        model = sklearn.decomposition.PCA(n_components=N_COMP)
#    if model_name == 'sparse_pca':
#       model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
#                                    l1=ll1, l2=ll2, ltv=ltv,
#                                    Atv=Atv,
#                                    criterion="frobenius",
#                                    eps=1e-6,
#                                    max_iter=100,
#                                    inner_max_iter=int(1e4),
#                                    output=False)
    if model_name == 'sparse_pca':
        model = sklearn.decomposition.SparsePCA(n_components=N_COMP,alpha=ll1)   
        
    if model_name == 'struct_pca':
        model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                                    l1=ll1, l2=ll2, ltv=ltv,
                                    Atv=Atv,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e4),
                                    output=True)
    t0 = time.clock()
    t0 = time.clock()
    model.fit(X_train)
    t1 = time.clock()
    _time = t1 - t0
    #print "X_test", GLOBAL.DATA["X"][1].shape

    # Save the projectors
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        V = model.components_.T
    if model_name == 'struct_pca' :
        V = model.V

    # Project train & test data
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_transform = model.transform(X_train)
        X_test_transform = model.transform(X_test)
        
    if (model_name == 'struct_pca')  :
        X_train_transform, _ = model.transform(X_train)
        X_test_transform, _ = model.transform(X_test)

    # Reconstruct train & test data
    # For SparsePCA or PCA, the formula is: UV^t (U is given by transform)
    # For StructPCA this is implemented in the predict method (which uses
    # transform)
    if (model_name == 'pca') or (model_name == 'sparse_pca'):
        X_train_predict = np.dot(X_train_transform, V.T)
        X_test_predict = np.dot(X_test_transform, V.T)
        
    if (model_name == 'struct_pca') :
        X_train_predict = model.predict(X_train)
        X_test_predict = model.predict(X_test)

    # Compute Frobenius norm between original and recontructed datasets
    frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro')
    frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro')
    print (frobenius_test )


    # Compute explained variance ratio
    evr_train = metrics.adjusted_explained_variance(X_train_transform)
    evr_train /= np.var(X_train, axis=0).sum()
    evr_test = metrics.adjusted_explained_variance(X_test_transform)
    evr_test /= np.var(X_test, axis=0).sum()

    # Remove predicted values (they are huge)
    del X_train_predict, X_test_predict

    # Compute geometric metrics and norms of components
    TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv)
    l0 = np.zeros((N_COMP,))
    l1 = np.zeros((N_COMP,))
    l2 = np.zeros((N_COMP,))
    tv = np.zeros((N_COMP,))
    for i in range(N_COMP):
        # Norms
        l0[i] = np.linalg.norm(V[:, i], 0)
        l1[i] = np.linalg.norm(V[:, i], 1)
        l2[i] = np.linalg.norm(V[:, i], 2)
        tv[i] = TV.f(V[:, i])

    ret = dict(frobenius_train=frobenius_train,
               frobenius_test=frobenius_test,
               components=V,
               X_train_transform=X_train_transform,
               X_test_transform=X_test_transform,
               evr_train=evr_train,
               evr_test=evr_test,
               l0=l0,
               l1=l1,
               l2=l2,
               tv=tv,
               time=_time)

    output_collector.collect(key, ret)
global_pen=0.01
l1_ratio=0.5
tv_ratio=0.5
ll1, ll2, ltv = compute_coefs_from_ratios(global_pen,tv_ratio,l1_ratio)
start_vector = start_vectors.RandomStartVector(seed=24)


MODELS["SparsePCA"] = \
   sklearn.decomposition.SparsePCA(n_components=N_COMP,alpha=1)   


MODELS["ElasticNetPCA"] = \
   pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                                    l1=ll1, l2=ll2, ltv=1e-6,
                                    Atv=Atv,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e4),
                                    output=False,start_vector=start_vector)

MODELS["SPCATV"] = \
    pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                                    l1=ll1, l2=ll2, ltv=ltv,
                                    Atv=Atv,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e4),
                                    output=False,start_vector=start_vector)

###############################################################################
Пример #6
0
# pending
ll1, ll2, ltv = 0.05 * 0.025937425654559931, 1, 0.003
key_pca_enettv = "pca_enettv_%.4f_%.3f_%.3f" % (ll1, ll2, ltv)

## key_pca_enettv = CHOICE
key = key_pca_enettv
print(OUTPUT_DIR.format(key=key))

if not (os.path.exists(OUTPUT_DIR.format(key=key))):
    os.makedirs(OUTPUT_DIR.format(key=key))

model = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                            l1=ll1,
                            l2=ll2,
                            ltv=ltv,
                            Atv=A,
                            criterion="frobenius",
                            eps=1e-6,
                            max_iter=100,
                            inner_max_iter=inner_max_iter,
                            verbose=True)

t0 = time.clock()
model.fit(X)
model.l1_max(X)
t1 = time.clock()
_time = t1 - t0
print("Time TOT(s)", _time)

# Save results
#model.U, model.d, model.V = m["U"], m["d"], m["V"]
PC, d = model.transform(X)
Пример #7
0
def mapper(key, output_collector):
    import mapreduce as GLOBAL  # access to global variables:
    # GLOBAL.DATA
    # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]}
    # key: list of parameters
    model_name, global_pen, tv_ratio, l1_ratio = key
    if model_name == 'pca':
        # Force the key
        global_pen = tv_ratio = l1_ratio = 0
    if model_name == 'struct_pca':
        ll1, ll2, ltv = compute_coefs_from_ratios(global_pen, tv_ratio,
                                                  l1_ratio)
        # This should not happen
        if ll1 > GLOBAL.l1_max:
            raise ValueError

    X_train = GLOBAL.DATA_RESAMPLED["X"][0]
    n, p = X_train.shape
    X_test = GLOBAL.DATA_RESAMPLED["X"][1]

    # A matrices
    Atv = GLOBAL.Atv

    # Fit model
    if model_name == 'pca':
        model = sklearn.decomposition.PCA(n_components=GLOBAL.N_COMP)
    if model_name == 'struct_pca':
        model = pca_tv.PCA_L1_L2_TV(n_components=GLOBAL.N_COMP,
                                    l1=ll1,
                                    l2=ll2,
                                    ltv=ltv,
                                    Atv=Atv,
                                    criterion="frobenius",
                                    eps=1e-6,
                                    max_iter=100,
                                    inner_max_iter=int(1e4),
                                    output=False)
    t0 = time.clock()
    model.fit(X_train)
    t1 = time.clock()
    _time = t1 - t0
    #print "X_test", GLOBAL.DATA["X"][1].shape

    # Save the projectors
    if (model_name == 'pca'):
        components = model.components_.T
    if model_name == 'struct_pca':
        components = model.V

    # Threshold components
    thresh_components = np.empty(components.shape)
    thresholds = np.empty((GLOBAL.N_COMP, ))
    for k in range(GLOBAL.N_COMP):
        thresh_comp, t = array_utils.arr_threshold_from_norm2_ratio(
            components[:, k], .99)
        thresh_components[:, k] = thresh_comp
        thresholds[k] = t

    # Project train & test data
    if (model_name == 'pca'):
        X_train_transform = model.transform(X_train)
        X_test_transform = model.transform(X_test)
    if (model_name == 'struct_pca'):
        X_train_transform, _ = model.transform(X_train)
        X_test_transform, _ = model.transform(X_test)

    # Reconstruct train & test data
    # For PCA, the formula is: UV^t (U is given by transform)
    # For StructPCA this is implemented in the predict method (which uses
    # transform)
    if (model_name == 'pca'):
        X_train_predict = np.dot(X_train_transform, components.T)
        X_test_predict = np.dot(X_test_transform, components.T)
    if (model_name == 'struct_pca'):
        X_train_predict = model.predict(X_train)
        X_test_predict = model.predict(X_test)

    # Compute Frobenius norm between original and recontructed datasets
    frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro')
    frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro')

    # Compute geometric metrics and norms of components
    TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv)
    l0 = np.zeros((GLOBAL.N_COMP, ))
    l1 = np.zeros((GLOBAL.N_COMP, ))
    l2 = np.zeros((GLOBAL.N_COMP, ))
    tv = np.zeros((GLOBAL.N_COMP, ))
    recall = np.zeros((GLOBAL.N_COMP, ))
    precision = np.zeros((GLOBAL.N_COMP, ))
    fscore = np.zeros((GLOBAL.N_COMP, ))
    for i in range(GLOBAL.N_COMP):
        # Norms
        l0[i] = np.linalg.norm(components[:, i], 0)
        l1[i] = np.linalg.norm(components[:, i], 1)
        l2[i] = np.linalg.norm(components[:, i], 2)
        tv[i] = TV.f(components[:, i])

    # Compute explained variance ratio
    evr_train = metrics.adjusted_explained_variance(X_train_transform)
    evr_train /= np.var(X_train, axis=0).sum()
    evr_test = metrics.adjusted_explained_variance(X_test_transform)
    evr_test /= np.var(X_test, axis=0).sum()

    ret = dict(frobenius_train=frobenius_train,
               frobenius_test=frobenius_test,
               components=components,
               thresh_components=thresh_components,
               thresholds=thresholds,
               X_train_transform=X_train_transform,
               X_test_transform=X_test_transform,
               X_train_predict=X_train_predict,
               X_test_predict=X_test_predict,
               recall=recall,
               precision=precision,
               fscore=fscore,
               evr_train=evr_train,
               evr_test=evr_test,
               l0=l0,
               l1=l1,
               l2=l2,
               tv=tv,
               time=_time)

    output_collector.collect(key, ret)
Пример #8
0
)

########################################################################
#explained variance
import pca_tv
N_COMP = 5
import parsimony
Atv = parsimony.functions.nesterov.tv.A_from_mask(babel_mask.get_data())
X = T_hallu
print("# explained variance #############################################")
#fh = open(os.path.join(OUTPUT_DIR.format(key=key), "pca_enettv_info.txt"), "a")
mod = pca_tv.PCA_L1_L2_TV(n_components=N_COMP,
                          l1=0.1,
                          l2=0.1,
                          ltv=0.1,
                          Atv=Atv,
                          criterion="frobenius",
                          eps=1e-6,
                          max_iter=100,
                          inner_max_iter=1000)

mod.U, mod.V = projections, components

rsquared = np.zeros((N_COMP))
for j in range(N_COMP):
    mod.n_components = j + 1
    X_predict = mod.predict(X)
    sse = np.sum((X - X_predict)**2)
    ssX = np.sum(X**2)
    rsquared[j] = 1 - sse / ssX