class SparsePCAImpl(): def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=None, U_init=None, V_init=None, verbose=False, random_state=None, normalize_components=False): self._hyperparams = { 'n_components': n_components, 'alpha': alpha, 'ridge_alpha': ridge_alpha, 'max_iter': max_iter, 'tol': tol, 'method': method, 'n_jobs': n_jobs, 'U_init': U_init, 'V_init': V_init, 'verbose': verbose, 'random_state': random_state, 'normalize_components': normalize_components} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def SPCA(model_data, components = None, transform_data = None): t0 = time() spca = SparsePCA(n_components=components) if transform_data == None: projection = spca.fit_transform(model_data) else: spca.fit(model_data) projection = spca.transform(transform_data) print "Sparse PCA Time: %0.3f" % (time() - t0) return projection
def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
def __init__(self, n_components=None, alpha=1, ridge_alpha=0.01, max_iter=1000, tol=1e-08, method='lars', n_jobs=None, U_init=None, V_init=None, verbose=False, random_state=None, normalize_components=False): self._hyperparams = { 'n_components': n_components, 'alpha': alpha, 'ridge_alpha': ridge_alpha, 'max_iter': max_iter, 'tol': tol, 'method': method, 'n_jobs': n_jobs, 'U_init': U_init, 'V_init': V_init, 'verbose': verbose, 'random_state': random_state, 'normalize_components': normalize_components} self._wrapped_model = SKLModel(**self._hyperparams)
def SPCA(model_data, components=None, transform_data=None): t0 = time() spca = SparsePCA(n_components=components) if transform_data == None: projection = spca.fit_transform(model_data) else: spca.fit(model_data) projection = spca.transform(transform_data) print "Sparse PCA Time: %0.3f" % (time() - t0) return projection
'RandomizedPCA':RandomizedPCA(), 'Ridge':Ridge(), 'RidgeCV':RidgeCV(), 'RidgeClassifier':RidgeClassifier(), 'RidgeClassifierCV':RidgeClassifierCV(), 'RobustScaler':RobustScaler(), 'SGDClassifier':SGDClassifier(), 'SGDRegressor':SGDRegressor(), 'SVC':SVC(), 'SVR':SVR(), 'SelectFdr':SelectFdr(), 'SelectFpr':SelectFpr(), 'SelectFwe':SelectFwe(), 'SelectKBest':SelectKBest(), 'SelectPercentile':SelectPercentile(), 'ShrunkCovariance':ShrunkCovariance(), 'SkewedChi2Sampler':SkewedChi2Sampler(), 'SparsePCA':SparsePCA(), 'SparseRandomProjection':SparseRandomProjection(), 'SpectralBiclustering':SpectralBiclustering(), 'SpectralClustering':SpectralClustering(), 'SpectralCoclustering':SpectralCoclustering(), 'SpectralEmbedding':SpectralEmbedding(), 'StandardScaler':StandardScaler(), 'TSNE':TSNE(), 'TheilSenRegressor':TheilSenRegressor(), 'VBGMM':VBGMM(), 'VarianceThreshold':VarianceThreshold(),}
# PREPROCESSING # SCALING minMaxScaler = MinMaxScaler(feature_range=(0.0, 1.0)) #normalizer = skprep.Normalizer() columnDeleter = fs.FeatureDeleter() # FEATURE SELECTION varianceThresholdSelector = VarianceThreshold(threshold=(0)) percentileSelector = SelectPercentile(score_func=f_classif, percentile=20) kBestSelector = SelectKBest(f_classif, 1000) # FEATURE EXTRACTION #rbmPipe = skpipe.Pipeline(steps=[('scaling', minMaxScaler), ('rbm', rbm)]) nmf = NMF(n_components=150) pca = PCA(n_components=80) sparse_pca = SparsePCA(n_components=700, max_iter=3, verbose=2) kernel_pca = KernelPCA(n_components=150) # Costs huge amounts of ram randomized_pca = RandomizedPCA(n_components=500) # REGRESSORS random_forest_regressor = RandomForestRegressor(n_estimators=256) gradient_boosting_regressor = GradientBoostingRegressor(n_estimators=60) support_vector_regressor = svm.SVR() # CLASSIFIERS support_vector_classifier = svm.SVC(probability=True, verbose=True) linear_support_vector_classifier = svm.LinearSVC(dual=False) nearest_neighbor_classifier = KNeighborsClassifier() extra_trees_classifier = ExtraTreesClassifier(n_estimators=256) bagging_classifier = BaggingClassifier( base_estimator=GradientBoostingClassifier(n_estimators=200,
df = pd.DataFrame(d) # print df.head() # df = pd.get_dummies(df,drop_first=True) X = list(df['features']) X = np.array(X) from scipy import sparse # X=sparse.csr_matrix(X) # print(b) from sklearn.decomposition.truncated_svd import TruncatedSVD from sklearn.decomposition.sparse_pca import SparsePCA from sklearn.decomposition import dict_learning_online sparsepca = SparsePCA(n_components=200) X = sparsepca.fit_transform(X) pca = TruncatedSVD(n_components=2) # X = pca.fit_transform(X) # X = X.reshape(-1, 1) Y = df['tag'] from sklearn.model_selection import train_test_split # X_train, X_test, y_train, y_test = train_test_split(X,Y , test_size=0.2, random_state=42,stratify=Y) X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)