model_name, global_pen, tv_ratio, l1_ratio = params if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=N_COMP) model.components_ = components.T model.mean_ = X.mean(axis=0) computed_projections = model.transform(X) if model_name == 'struct_pca': # We don'really need those parameters here ltv = global_pen * tv_ratio ll1 = l1_ratio * global_pen * (1 - ltv) ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) model = pca_tv.PCA_SmoothedL1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, Al1=Al1, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False) model.V = components computed_projections, _ = model.transform(X) # Projections projections_file = os.path.join(input_dir, "X_train_transform.npz") projections = np.load(projections_file)['arr_0'] # Compare both assert (np.allclose(projections, computed_projections)) # Create df for this model index = pd.MultiIndex.from_arrays([subjects_id, np.asarray([model_name] * n), np.asarray([global_pen] * n),
alpha = 0.1 l1, l2, ltv = alpha * np.array((.4, 1/alpha, .001)) #k =0.5 #"l = 2*k #g = 4 pca_sklearn = sklearn.decomposition.PCA(n_components=N_COMP) pca_sklearn.fit(X) V_pca_sklearn = pca_sklearn.components_.transpose() del pca_sklearn # ExcessiveGap e_eg_sparse = pca_tv.PCA_SmoothedL1_L2_TV(l1, l2, ltv, Atv, Al1, n_components=N_COMP, criterion="frobenius", eps=1e-6, inner_max_iter=int(1e5), use_eg=True, output=True) t = timeit.timeit(stmt='e_eg_sparse.fit(X)', setup="from __main__ import e_eg_sparse, X", number=1) print "EG:", t V_sparse_eg = e_eg_sparse.V del e_eg_sparse # CONESTA e_con_sparse = pca_tv.PCA_SmoothedL1_L2_TV(50*l1, 50*l2, 50*ltv, Atv, Al1, n_components=N_COMP, criterion="frobenius", eps=1e-6, # inner_eps=1e-8, inner_max_iter=int(1e5),
def mapper(key, output_collector): import mapreduce as GLOBAL # access to global variables: # GLOBAL.DATA # GLOBAL.DATA ::= {"X":[Xtrain, ytrain], "y":[Xtest, ytest]} # key: list of parameters model_name, global_pen, tv_ratio, l1_ratio = key if model_name == 'pca': # Force the key global_pen = tv_ratio = l1_ratio = 0 if model_name == 'sparse_pca': # Force the key tv_ratio = 0 l1_ratio = 1 ll1 = global_pen if model_name == 'struct_pca': ltv = global_pen * tv_ratio ll1 = l1_ratio * global_pen * (1 - tv_ratio) ll2 = (1 - l1_ratio) * global_pen * (1 - tv_ratio) assert (np.allclose(ll1 + ll2 + ltv, global_pen)) X_train = GLOBAL.DATA_RESAMPLED["X"][0] n, p = X_train.shape X_test = GLOBAL.DATA_RESAMPLED["X"][1] # A matrices Atv = GLOBAL.Atv Al1 = scipy.sparse.eye(p, p) # Fit model if model_name == 'pca': model = sklearn.decomposition.PCA(n_components=N_COMP) if model_name == 'sparse_pca': model = sklearn.decomposition.SparsePCA(n_components=N_COMP, alpha=ll1) if model_name == 'struct_pca': model = pca_tv.PCA_SmoothedL1_L2_TV(n_components=N_COMP, l1=ll1, l2=ll2, ltv=ltv, Atv=Atv, Al1=Al1, criterion="frobenius", eps=1e-6, max_iter=100, inner_max_iter=int(1e4), output=False) t0 = time.clock() model.fit(X_train) t1 = time.clock() _time = t1 - t0 #print "X_test", GLOBAL.DATA["X"][1].shape # Save the projectors if (model_name == 'pca') or (model_name == 'sparse_pca'): V = model.components_.T if model_name == 'struct_pca': V = model.V # Project train & test data if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_transform = model.transform(X_train) X_test_transform = model.transform(X_test) if (model_name == 'struct_pca'): X_train_transform, _ = model.transform(X_train) X_test_transform, _ = model.transform(X_test) # Reconstruct train & test data # For SparsePCA or PCA, the formula is: UV^t (U is given by transform) # For StructPCA this is implemented in the predict method (which uses # transform) if (model_name == 'pca') or (model_name == 'sparse_pca'): X_train_predict = np.dot(X_train_transform, V.T) X_test_predict = np.dot(X_test_transform, V.T) if (model_name == 'struct_pca'): X_train_predict = model.predict(X_train) X_test_predict = model.predict(X_test) # Compute Frobenius norm between original and recontructed datasets frobenius_train = np.linalg.norm(X_train - X_train_predict, 'fro') frobenius_test = np.linalg.norm(X_test - X_test_predict, 'fro') # Compute explained variance ratio evr_train = metrics.adjusted_explained_variance(X_train_transform) evr_train /= np.var(X_train, axis=0).sum() evr_test = metrics.adjusted_explained_variance(X_test_transform) evr_test /= np.var(X_test, axis=0).sum() # Compute geometric metrics and norms of components TV = parsimony.functions.nesterov.tv.TotalVariation(1, A=Atv) l0 = np.zeros((N_COMP, )) l1 = np.zeros((N_COMP, )) l2 = np.zeros((N_COMP, )) tv = np.zeros((N_COMP, )) for i in range(N_COMP): # Norms l0[i] = np.linalg.norm(V[:, i], 0) l1[i] = np.linalg.norm(V[:, i], 1) l2[i] = np.linalg.norm(V[:, i], 2) tv[i] = TV.f(V[:, i]) ret = dict(frobenius_train=frobenius_train, frobenius_test=frobenius_test, components=V, X_train_predict=X_train_predict, X_test_predict=X_test_predict, X_train_transform=X_train_transform, X_test_transform=X_test_transform, evr_train=evr_train, evr_test=evr_test, l0=l0, l1=l1, l2=l2, tv=tv, time=_time) output_collector.collect(key, ret)
number=1) print "Sparse PCA:", t V_sparsepca_sklearn = sparsepca_sklearn.components_.transpose() del sparsepca_sklearn # Struct PCA with few TV print "Fitting StructPCA" l2 = 1 ltv = alpha * .001 e1 = pca_tv.PCA_SmoothedL1_L2_TV( l1, l2, ltv, Atv, Al1, n_components=N_COMP, criterion="frobenius", eps=1e-6, # inner_eps=1e-8, inner_max_iter=int(1e5), use_eg=False, output=False) t = timeit.timeit(stmt='e1.fit(X)', setup="from __main__ import e1, X", number=1) print "Fitting StructPCA:", t V1 = e1.V # Struct PCA with more TV print "Fitting StructPCA with more TV" ltv = alpha * .01