def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. assert_equal(kwargs, {}) # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly", histogram): # histogram kernel produces singular matrix inside linalg.solve # XXX use a least-squares approximation? inv = not callable(kernel) # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed.size, 0) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def Kernel_PCA(HE_MI_train_test, kernel, invTran, degree): ''' 개요 - Kernel PCA 을 적용한다. ''' MyDataSet = HE_MI_train_test my_HEtraining = MyDataSet[0] my_MItraining = MyDataSet[1] my_HEtest = MyDataSet[2] my_MItest = MyDataSet[3] kpca = KernelPCA(kernel=kernel, fit_inverse_transform=invTran, degree=degree) HE_training_kpca = kpca.fit_transform(my_HEtraining) MI_training_kpca = kpca.fit_transform(my_MItraining) HE_test_kpca = kpca.fit_transform(my_HEtest) MI_test_kpca = kpca.fit_transform(my_MItest) HE_training_KPCA_2dim = []; MI_training_KPCA_2dim = [] HE_test_KPCA_2dim = []; MI_test_KPCA_2dim = [] for pt in HE_training_kpca: HE_training_KPCA_2dim.append((pt[0], pt[1])) for pt in MI_training_kpca: MI_training_KPCA_2dim.append((pt[0], pt[1])) for pt in HE_test_kpca: HE_test_KPCA_2dim.append((pt[0], pt[1])) for pt in MI_test_kpca: MI_test_KPCA_2dim.append((pt[0], pt[1])) return [HE_training_KPCA_2dim, MI_training_KPCA_2dim, HE_test_KPCA_2dim, MI_test_KPCA_2dim]
class RegionSplitter_PCA_KMean(): def __init__(self, data, label): data_dim_num = len(data[0]) label_dim_num = len(label[0]) self.n_comp = max(1, data_dim_num) self.pca = PCA(n_components=self.n_comp) data = self.pca.fit_transform(data) data_zipped = list(zip(*data)) # k-mean cluster for the dimension self.clusterer = KMeans(n_clusters=2, init='k-means++') self.clusterer.fit(list(zip(*data_zipped))) def classify(self, data): if not isinstance(data, tuple): raise(TypeError, "data must be a tuple") data = tuple(self.pca.transform(data)[0]) group = self.clusterer.predict(data) return group == 0
def test_kernel_pca(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("auto", "dense", "arpack"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=True) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel assert_not_equal(X_fit_transformed, []) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1]) # inverse transform X_pred2 = kpca.inverse_transform(X_pred_transformed) assert_equal(X_pred2.shape, X_pred.shape)
def kPCA_visualization1d(X, y): kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) pca = PCA(n_components=1) X_pca = pca.fit_transform(X) class_1 = [] class_0 = [] for i in range(0, len(y)): if y[i] == 1: class_1.append( list( X_kpca[i] )[0] ) else: class_0.append( list( X_kpca[i] )[0] ) print "check" print class_1[:10] import numpy from matplotlib import pyplot pyplot.hist(class_1, 50, alpha=0.5, label='class 1' ) pyplot.hist(class_0, 50, alpha=0.5, label='class 0') pyplot.legend(loc='upper right') pyplot.show()
def test_compare_clinical_kernel(self): x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1', standardize_numeric=False, to_numeric=False) trans = ClinicalKernelTransform() trans.fit(x_full) x = encode_categorical(standardize(x_full)) kpca = KernelPCA(kernel=trans.pairwise_kernel) xt = kpca.fit_transform(x) nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0) nrsvm.fit(xt, y) rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel, tol=1e-8, max_iter=1000, random_state=0) rsvm.fit(x, y) pred_nrsvm = nrsvm.predict(kpca.transform(x)) pred_rsvm = rsvm.predict(x) self.assertEqual(len(pred_nrsvm), len(pred_rsvm)) c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm) c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm) self.assertAlmostEqual(c1[0], c2[0]) self.assertTupleEqual(c1[1:], c2[1:])
def MyPCA(): X,y = circle_data() kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma= 10) X_kpca = kpca.fit_transform(X) pca = PCA() x_pca = pca.fit_transform(X) return X_kpca
def perform_pca(self): """consider principle components as covariates, will be appended to self.X num_pcs : int Number of principle components to use as covariates K = self._centerer.fit_transform(K) # compute eigenvectors if self.eigen_solver == 'auto': if K.shape[0] > 200 and n_components < 10: eigen_solver = 'arpack' else: eigen_solver = 'dense' else: eigen_solver = self.eigen_solver if eigen_solver == 'dense': self.lambdas_, self.alphas_ = linalg.eigh( K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1)) elif eigen_solver == 'arpack': self.lambdas_, self.alphas_ = eigsh(K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter) # sort eigenvectors in descending order indices = self.lambdas_.argsort()[::-1] self.lambdas_ = self.lambdas_[indices] self.alphas_ = self.alphas_[:, indices] # remove eigenvectors with a zero eigenvalue if self.remove_zero_eig or self.n_components is None: self.alphas_ = self.alphas_[:, self.lambdas_ > 0] self.lambdas_ = self.lambdas_[self.lambdas_ > 0] X_transformed = self.alphas_ * np.sqrt(self.lambdas_) """ #TODO: implement numerics code directly, based on above template logging.info("performing PCA, keeping %i principle components" % (self.num_pcs)) tt0 = time.time() if False: pca = KernelPCA(n_components=self.num_pcs) pca._fit_transform(self.K) self.pcs = pca.alphas_ * np.sqrt(pca.lambdas_) else: import scipy.linalg as la [s,u]=la.eigh(self.K) s=s[::-1] u=u[:,::-1] self.pcs = u[:,0:self.num_pcs] assert self.pcs.shape[1] == self.num_pcs self.X = sp.hstack((self.X, self.pcs)) logging.info("...done. PCA time %.2f s" % (float(time.time() - tt0)))
def pca(X, gamma1): kpca = KernelPCA(kernel='rbf', fit_inverse_transform=False, gamma=gamma1) X_kpca = kpca.fit_transform(X) print('X', X.shape) print('alphas', kpca.alphas_.shape) print('lambdas', kpca.lambdas_.shape) #X_back = kpca.inverse_transform(X_kpca) return X_kpca
def isomap(self, num_dims=None, directed=None): '''Isomap embedding. num_dims : dimension of embedded coordinates, defaults to input dimension directed : used for .shortest_path() calculation ''' W = -0.5 * self.shortest_path(directed=directed) ** 2 kpca = KernelPCA(n_components=num_dims, kernel='precomputed') return kpca.fit_transform(W)
def __init__(self,corpus,n_components=2,kernel=None): StyloClassifier.__init__(self,corpus) data = self.data_frame[self.cols].values self.n_components = n_components self.kernel = kernel if not kernel: self.pca = PCA(n_components=self.n_components) else: self.pca = KernelPCA(kernel=kernel, gamma=10) self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))
def getProjectionMatrixKPCA(dim=50): """ Kernel PCA : see paper for detailed description""" # Create an X for the hierarchy X = np.zeros((len(labelDict), len(labelDict))) for item in labelDict: pars = getPathToRoot(item) for par in pars: X[labelIndex[item]][labelIndex[par]] = 1 kpca = KernelPCA(n_components=dim, fit_inverse_transform=True) X_kpca = kpca.fit(X) return kpca, kpca.alphas_
def main(): definition = load_definition() data = np.load(os.path.join(ROOT, definition.embedding)) uuids = np.load(os.path.join(ROOT, definition.uuids)) pca = KernelPCA(**definition.pca) tsne = TSNE(**definition.tsne) data = pca.fit_transform(data) data = tsne.fit_transform(data) plot_vectors(data, uuids, definition.sources, definition.output)
def test_kernel_pca_n_components(): rng = np.random.RandomState(0) X_fit = rng.random_sample((5, 4)) X_pred = rng.random_sample((2, 4)) for eigen_solver in ("dense", "arpack"): for c in [1, 2, 4]: kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver) shape = kpca.fit(X_fit).transform(X_pred).shape assert_equal(shape, (2, c))
def test_kernel_pca_consistent_transform(): # X_fit_ needs to retain the old, unmodified copy of X state = np.random.RandomState(0) X = state.rand(10, 10) kpca = KernelPCA(random_state=state).fit(X) transformed1 = kpca.transform(X) X_copy = X.copy() X[:, 0] = 666 transformed2 = kpca.transform(X_copy) assert_array_almost_equal(transformed1, transformed2)
def fit(self,X, num, method='dijkstra'): # Construct k-neigh. graph knn = KNN(num).fit(X) #Find shortest path if method == 'dijkstra': result = dijkstra(knn) else: result = shortest_path(knn, method=method) #Multidimensional scaling #Can be used Kernel PCA model = KernelPCA(n_components=num) return model.fit_transform(result)
def kernelPCA(data, labels, new_dimension): print "start kernel pca..." if hasattr(data, "toarray"): data = data.toarray() start = time.time() pca = KernelPCA(fit_inverse_transform=True, gamma=10, n_components=new_dimension, alpha=2) reduced = pca.fit_transform(data) end = time.time() return (reduced, end-start)
def isomap(X, n_neighbors, metric): """ Based on sklearn, Author: Jake Vanderplas -- <*****@*****.**> License: BSD, (C) 2011 """ kng = kneighbors_graph(D, n_neighbors = n_neighbors, metric = metric) dist_matrix_ = graph_shortest_path(kng, method='auto', directed=False) kernel_pca_ = KernelPCA(n_components=2, kernel="precomputed", eigen_solver='auto') G = dist_matrix_ ** 2 G *= -0.5 return kernel_pca_.fit_transform(G)
def reduce_kpca(X, kern, retall=False): """ reduce_kpca(X, components, kern, retall=False) Reduce dim by Kernel PCA """ kpca = KernelPCA(kernel=kern, fit_inverse_transform=True) X_kpca = kpca.fit_transform(X) X_back = kpca.inverse_transform(X_kpca) if not retall: return X_kpca, X_back else: return X_kpca, X_back, kpca
def test_kernel_pca_deterministic_output(): rng = np.random.RandomState(0) X = rng.rand(10, 10) eigen_solver = ('arpack', 'dense') for solver in eigen_solver: transformed_X = np.zeros((20, 2)) for i in range(20): kpca = KernelPCA(n_components=2, eigen_solver=solver, random_state=rng) transformed_X[i, :] = kpca.fit_transform(X)[0] assert_allclose( transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
def RunKPCAScikit(q): totalTimer = Timer() # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') with totalTimer: # Get the new dimensionality, if it is necessary. dimension = re.search('-d (\d+)', options) if not dimension: d = data.shape[1] else: d = int(dimension.group(1)) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the kernel type and make sure it is valid. kernel = re.search("-k ([^\s]+)", options) try: if not kernel: Log.Fatal("Choose kernel type, valid choices are 'linear'," + " 'hyptan' and 'polynomial'.") q.put(-1) return -1 elif kernel.group(1) == "linear": model = KernelPCA(n_components=d, kernel="linear") elif kernel.group(1) == "hyptan": model = KernelPCA(n_components=d, kernel="sigmoid") elif kernel.group(1) == "polynomial": degree = re.search('-D (\d+)', options) degree = 1 if not degree else int(degree.group(1)) model = KernelPCA(n_components=d, kernel="poly", degree=degree) else: Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'linear', 'hyptan' and 'polynomial'.") q.put(-1) return -1 out = model.fit_transform(data) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def generate_kpca_compression(X, n_components=16): """ Compresses the data using sklearn KernelPCA implementation. :param X: Data (n_samples, n_features) :param n_components: Number of dimensions for PCA to keep :return: X_prime (the compressed representation), pca """ kpca = KernelPCA(n_components=n_components, kernel='rbf', eigen_solver='arpack', fit_inverse_transform=False) kpca.fit(X) return kpca.transform(X), kpca
def reduceDataset(self,nr=3,method='PCA'): '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library Methods available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] #dataset=self.dataset[Model.in_columns] #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']] #PCA if method=='PCA': sklearn_pca = sklearnPCA(n_components=nr) reduced = sklearn_pca.fit_transform(dataset) #Factor Analysis elif method=='FactorAnalysis': fa=FactorAnalysis(n_components=nr) reduced=fa.fit_transform(dataset) #kernel pca with rbf kernel elif method=='KPCArbf': kpca=KernelPCA(nr,kernel='rbf') reduced=kpca.fit_transform(dataset) #kernel pca with poly kernel elif method=='KPCApoly': kpca=KernelPCA(nr,kernel='poly') reduced=kpca.fit_transform(dataset) #kernel pca with cosine kernel elif method=='KPCAcosine': kpca=KernelPCA(nr,kernel='cosine') reduced=kpca.fit_transform(dataset) #kernel pca with sigmoid kernel elif method=='KPCAsigmoid': kpca=KernelPCA(nr,kernel='sigmoid') reduced=kpca.fit_transform(dataset) #ICA elif method=='IPCA': ipca=IncrementalPCA(nr) reduced=ipca.fit_transform(dataset) #Fast ICA elif method=='FastICAParallel': fip=FastICA(nr,algorithm='parallel') reduced=fip.fit_transform(dataset) elif method=='FastICADeflation': fid=FastICA(nr,algorithm='deflation') reduced=fid.fit_transform(dataset) elif method == 'All': self.dimensionalityReduction(nr=nr) return self self.ModelInputs.update({method:reduced}) self.datasetsAvailable.append(method) return self
def dimensionalityReduction(self,nr=5): '''It applies all the dimensionality reduction techniques available in this class: Techniques available: 'PCA' 'FactorAnalysis' 'KPCArbf','KPCApoly' 'KPCAcosine','KPCAsigmoid' 'IPCA' 'FastICADeflation' 'FastICAParallel' 'Isomap' 'LLE' 'LLEmodified' 'LLEltsa' ''' dataset=self.ModelInputs['Dataset'] sklearn_pca = sklearnPCA(n_components=nr) p_components = sklearn_pca.fit_transform(dataset) fa=FactorAnalysis(n_components=nr) factors=fa.fit_transform(dataset) kpca=KernelPCA(nr,kernel='rbf') rbf=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='poly') poly=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='cosine') cosine=kpca.fit_transform(dataset) kpca=KernelPCA(nr,kernel='sigmoid') sigmoid=kpca.fit_transform(dataset) ipca=IncrementalPCA(nr) i_components=ipca.fit_transform(dataset) fip=FastICA(nr,algorithm='parallel') fid=FastICA(nr,algorithm='deflation') ficaD=fip.fit_transform(dataset) ficaP=fid.fit_transform(dataset) '''isomap=Isomap(n_components=nr).fit_transform(dataset) try: lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset) except ValueError: lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset) try: lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset) except ValueError: lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) try: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset) except ValueError: lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)''' values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3] keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa'] self.ModelInputs.update(dict(zip(keys, values))) [self.datasetsAvailable.append(key) for key in keys ] #debug #dataset=pd.DataFrame(self.ModelInputs['Dataset']) #dataset['Output']=self.ModelOutput #self.debug['Dimensionalityreduction']=dataset ### return self
def project(X, kde = False, kernel = False, gamma = 10): if kernel: kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=gamma) reduced_data = kpca.fit_transform(X) else: pca = PCA(n_components=2).fit(X) print pca.explained_variance_ratio_ print pca.components_ reduced_data = pca.transform(X) if kde: with sns.axes_style("white"): sns.jointplot(reduced_data[:, 0], reduced_data[:, 1], kind="kde"); plt.show() plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2) return reduced_data
def gogo_kpca( fxpath, mpath ): kpca_params = {'n_components':256, 'kernel':'rbf', 'gamma':None, 'degree':3, 'coef0':1, 'kernel_params':None, 'alpha':1.0, 'fit_inverse_transform':False, 'eigen_solver':'auto', 'tol':0, 'max_iter':None, 'remove_zero_eig':True} kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath for i in range(7): if i < 5: nbreed = 1 sbreed = 'dog' nsubject = i+1 else: nbreed = 2 sbreed = 'human' nsubject = 1 + abs(5-i) print 'breed%d.subject%d..' % ( nbreed, nsubject ) X_ictal = load_features( fxpath, nbreed, nsubject, 1 ) X_inter = load_features( fxpath, nbreed, nsubject, 2 ) X = vstack((X_inter, X_ictal)) del X_inter, X_ictal; gc.collect() X_test = load_features( fxpath, nbreed, nsubject, 3 ) X = vstack((X, X_test)) del X_test; gc.collect() kpca = KernelPCA(**kpca_params) skip_interval = get_skip_interval(X) X = kpca_preprocess_features(X) kpca.fit(X[::skip_interval]) with open(kpca_fname.format(sbreed,nsubject),'wb') as f: cPickle.dump(kpca,f) del X, kpca; gc.collect()
def test_kernel_pca_sparse(): rng = np.random.RandomState(0) X_fit = sp.csr_matrix(rng.random_sample((5, 4))) X_pred = sp.csr_matrix(rng.random_sample((2, 4))) for eigen_solver in ("auto", "arpack"): for kernel in ("linear", "rbf", "poly"): # transform fit data kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=False) X_fit_transformed = kpca.fit_transform(X_fit) X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit) assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2)) # transform new data X_pred_transformed = kpca.transform(X_pred) assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])
def _fit_transform(self, X): self.nbrs_.fit(X) self.training_data_ = self.nbrs_._fit_X self.kernel_pca_ = KernelPCA(n_components=self.n_components, kernel="precomputed", eigen_solver=self.eigen_solver, tol=self.tol, max_iter=self.max_iter) kng = kneighbors_graph(self.nbrs_, self.n_neighbors, mode="distance") n_points = X.shape[0] n_workers = blob_ctx.get().num_workers if n_points < n_workers: tile_hint = (1, ) else: tile_hint = (n_points / n_workers, ) """ task_array is used for deciding the idx of starting points and idx of endding points that each tile needs to find the shortest path among. """ task_array = expr.ndarray((n_points,), tile_hint=tile_hint) task_array = task_array.force() #dist matrix is used to hold the result dist_matrix = expr.ndarray((n_points, n_points), reduce_fn=lambda a,b:a+b).force() results = task_array.foreach_tile(mapper_fn = _shortest_path_mapper, kw = {'kng' : kng, 'directed' : False, 'dist_matrix' : dist_matrix}) self.dist_matrix_ = dist_matrix.glom() G = self.dist_matrix_ ** 2 G *= -0.5 self.embedding_ = self.kernel_pca_.fit_transform(G)
def main(): #set the timer start = time.time() #load the data mnist = fetch_mldata('MNIST original') mnist.target = mnist.target.astype(np.int32) seed = np.random.randint(1,30000) rand = np.random.RandomState(seed) items = len(mnist.target) indices = rand.randint(items, size = 70000) trindex = indices[0:30000] tsindex = indices[30000:] #scale down features to the range [0, 1] mnist.data = mnist.data/255.0 mnist.data = mnist.data.astype(np.float32) trainX = mnist.data[trindex] testX = mnist.data[tsindex] trainY = mnist.target[trindex] testY = mnist.target[tsindex] #extract the features using KPCA kpca = KernelPCA(kernel='precomputed') kpca_train = arc_cosine(trainX[0:1000], trainX[0:1000]) #Fit the model from data in X kpca.fit(kpca_train) kernel_train = arc_cosine(trainX, trainX[0:1000]) kernel_test = arc_cosine(testX, trainX[0:1000]) trainX_kpca = kpca.transform(kernel_train) testX_kpca = kpca.transform(kernel_test) print testX_kpca.shape #fit the svm model and compute accuaracy measure clf = svm.SVC(kernel=arc_cosine) clf.fit(trainX_kpca, trainY) pred = clf.predict(testX_kpca) print accuracy_score(testY, pred) print('total : %d, correct : %d, incorrect : %d\n' %(len(pred), np.sum(pred == testY), np.sum(pred != testY))) print('Test Time : %f Minutes\n' %((time.time()-start)/60))
def __init__(self, data, label): self.cut_dim = 0 self.cut_val = 0 num_candidates = 50 data_dim_num = len(data[0]) label_dim_num = len(label[0]) self.n_comp = max(1, data_dim_num) self.pca = PCA(n_components=self.n_comp, kernel='linear') # self.ica = ICA(n_components=self.n_comp) data = self.pca.fit_transform(data) #data = self.ica.fit_transform(data) data_zipped = list(zip(*data)) data_dim_num = len(data[0]) label_dim_num = len(label[0]) # sort in each dimension dim_min = float("inf") for i in range(data_dim_num): for k in range(num_candidates): # pick a random value max_val = max(data_zipped[i]) min_val = min(data_zipped[i]) cut_val = random.choice(np.linspace(min_val, max_val, num=500)) groups = [[label[j] for j in range(len(data_zipped[i])) if data_zipped[i][j] <= cut_val], [label[j] for j in range(len(data_zipped[i])) if data_zipped[i][j] > cut_val]] # check if any of the group is 0 if len(groups[0]) == 0 or len(groups[1]) == 0: continue weighted_avg_variance = [] for group in groups: num_sample = len(group) group = zip(*group) variance = [] for group_k in group: mean = math.fsum(group_k)/len(group_k) norm = max(math.fsum([x**2 for x in group_k])/len(group_k), 1) variance.append(math.fsum([((x - mean)**2)/norm for x in group_k])) weighted_avg_variance.append(math.fsum(variance)/len(variance)*num_sample) in_group_variance = math.fsum(weighted_avg_variance) if in_group_variance < dim_min: dim_min = in_group_variance self.cut_dim = i self.cut_val = cut_val
PCA_S = pca.explained_variance_ratio_ # Percentage of variance that each component explains (eigenvectors) PCA_mean = pca.mean_ Xtrain_PCA = pca.fit_transform(Xtrain) # It obtains the features of the components.PCA Xtest_PCA = pca.transform(Xtest) # Kernel PCA if (FE_kPCA == 1): from sklearn.decomposition import KernelPCA import sklearn.metrics.pairwise as pair # Get proper value for the gamma of the gaussian projection d = pair.pairwise_distances(Xtrain,Xtrain) aux = np.triu(d) sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5)) gamma = 1/(2*sigma**2) kpca = KernelPCA(n_components = n_comp,kernel = "rbf",gamma = gamma) kpca.fit(Xtrain) kPCA_hyperplanes = kpca.alphas_ # Components of the descomposition (hyperplanes) (eigenvesctors) (eigenfaces) kPCA_S = kpca.lambdas_ # Percentage of variance that each component explains (eigenvectors) Xtrain_kPCA = kpca.transform(Xtrain) Xtest_kPCA = kpca.transform(Xtest) # ICA Independen Component Analisis if (FE_ICA == 1): from sklearn.decomposition import FastICA ica = FastICA(n_components = 15, max_iter = 10000, tol=0.00001 ) ica.fit(Xtrain) # Reconstruct signals ICA_components = ica.components_ # Get components
_file2 = pd.read_csv('arcene_train.labels') _file1['Class'] = (_file2['1']).astype(int) #print _file1.isnull() #print _file1.info()##################################################################################cleaning missing values _file1 = _file1.fillna(lambda x: x.median()) #print _file1.info() #print _file1 train, test = train_test_split(_file1, test_size=0.4) linear_svm = svm.SVC( kernel='linear' ) ########################################################################linear_SVM rbf_svm = svm.SVC( kernel='rbf' ) ##############################################################################rbf_SVM kpca = KernelPCA(n_components=55, kernel="rbf", fit_inverse_transform=True, gamma=10) #######################################KPCA #print train,test train = train.values.tolist() test = test.values.tolist() #print len(train[:][:]),len(test) ###########################################################################################################################data x = [] y = [] for i in train: x.append(i[:-2]) y.append(i[-1]) ########################################################################################################################end data ###############################################################################################kpca
tsvd = TruncatedSVD(n_components=n_comp, random_state=420) tsvd_results_train = tsvd.fit_transform(train.drop(["y"], axis=1)) tsvd_results_test = tsvd.transform(test) # PCA # pca = PCA(n_components=n_comp, random_state=420) # pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1)) # pca2_results_test = pca.transform(test) #sparse PCA spca = SparsePCA(n_components=n_comp, random_state=420) spca2_results_train = spca.fit_transform(train.drop(["y"], axis=1)) spca2_results_test = spca.transform(test) #Kernel PCA kpca = KernelPCA(n_components=n_comp, random_state=420) kpca2_results_train = kpca.fit_transform(train.drop(["y"], axis=1)) kpca2_results_test = kpca.transform(test) # ICA ica = FastICA(n_components=n_comp, random_state=420) ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1)) ica2_results_test = ica.transform(test) # GRP grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420) grp_results_train = grp.fit_transform(train.drop(["y"], axis=1)) grp_results_test = grp.transform(test) # SRP srp = SparseRandomProjection(n_components=n_comp,
def pipe_main(pipe=None): '''pipeline construction using sklearn estimators, final step support only classifiers currently .. note:: data flows through a pipeline consisting of steps as below: raw data --> clean --> encoding --> scaling --> feature construction --> feature selection --> resampling --> final estimator see scikit-learn preprocess & estimators parameter ---- pipe - str - in the format of 'xx_xx' of which 'xx' means steps in pipeline, default None return ---- 1) pipeline instance of chosen steps 2) if pipe is None, a dict indicating possible choice of 'steps' ''' clean = { 'clean': Split_cls(dtype_filter='not_datetime', na1='null', na2=-999), 'cleanNA': Split_cls(dtype_filter='not_datetime', na1=None, na2=None), 'cleanMean': Split_cls(dtype_filter='not_datetime', na1='most_frequent', na2='mean'), } # encode = { 'woe': Woe_encoder(max_leaf_nodes=5), 'oht': Oht_encoder(), 'ordi': Ordi_encoder(), } resample = { # over_sampling 'rover': RandomOverSampler(), 'smote': SMOTE(), 'bsmote': BorderlineSMOTE(), 'adasyn': ADASYN(), # under sampling controlled methods 'runder': RandomUnderSampler(), 'nearmiss': NearMiss(version=3), 'pcart': InstanceHardnessThreshold(), # under sampling cleaning methods 'tlinks': TomekLinks(n_jobs=-1), 'oside': OneSidedSelection(n_jobs=-1), 'cleanNN': NeighbourhoodCleaningRule(n_jobs=-1), 'enn': EditedNearestNeighbours(n_jobs=-1), 'ann': AllKNN(n_jobs=-1), 'cnn': CondensedNearestNeighbour(n_jobs=-1), # clean outliers 'inlierForest': FunctionSampler(outlier_rejection, kw_args={'method': 'IsolationForest'}), 'inlierLocal': FunctionSampler(outlier_rejection, kw_args={'method': 'LocalOutlierFactor'}), 'inlierEllip': FunctionSampler(outlier_rejection, kw_args={'method': 'EllipticEnvelope'}), 'inlierOsvm': FunctionSampler(outlier_rejection, kw_args={'method': 'OneClassSVM'}), # combine 'smoteenn': SMOTEENN(), 'smotelink': SMOTETomek(), } scale = { 'stdscale': StandardScaler(), 'maxscale': MinMaxScaler(), 'rscale': RobustScaler(quantile_range=(10, 90)), 'qauntile': QuantileTransformer(), # uniform distribution 'power': PowerTransformer(), # Gaussian distribution 'norm': Normalizer(), # default L2 norm # scale sparse data 'maxabs': MaxAbsScaler(), 'stdscalesp': StandardScaler(with_mean=False), } # feature construction feature_c = { 'pca': PCA(whiten=True), 'spca': SparsePCA(normalize_components=True, n_jobs=-1), 'ipca': IncrementalPCA(whiten=True), 'kpca': KernelPCA(kernel='rbf', n_jobs=-1), 'poly': PolynomialFeatures(degree=2), 'rtembedding': RandomTreesEmbedding(n_estimators=10), 'LDA': LinearDiscriminantAnalysis(), 'QDA': QuadraticDiscriminantAnalysis(), } # select from model feature_m = { 'fwoe': SelectFromModel(Woe_encoder(max_leaf_nodes=5)), 'flog': SelectFromModel( LogisticRegressionCV(penalty='l1', solver='saga', scoring='roc_auc')), 'fsgd': SelectFromModel(SGDClassifier(penalty="l1")), 'fsvm': SelectFromModel(LinearSVC('l1', dual=False, C=1e-2)), 'fxgb': SelectFromModel(XGBClassifier(n_jobs=-1)), 'frf': SelectFromModel(ExtraTreesClassifier(n_estimators=100, max_depth=5)), 'fRFExgb': RFE(XGBClassifier(n_jobs=-1), step=0.1, n_features_to_select=20), 'fRFErf': RFE(ExtraTreesClassifier(n_estimators=100, max_depth=5), step=0.3, n_features_to_select=20), 'fRFElog': RFE(LogisticRegressionCV(penalty='l1', solver='saga', scoring='roc_auc'), step=0.3, n_features_to_select=20) } # Univariate feature selection feature_u = { 'fchi2': GenericUnivariateSelect(chi2, 'percentile', 25), 'fMutualclf': GenericUnivariateSelect(mutual_info_classif, 'percentile', 25), 'fFclf': GenericUnivariateSelect(f_classif, 'percentile', 25), } # sklearn estimator t = all_estimators(type_filter=['classifier']) estimator = {} for i in t: try: estimator.update({i[0]: i[1]()}) except Exception: continue estimator.update( dummy=DummyClassifier(), XGBClassifier=XGBClassifier(n_jobs=-1), LogisticRegressionCV=LogisticRegressionCV(scoring='roc_auc'), EasyEnsembleClassifier=EasyEnsembleClassifier(), BalancedRandomForestClassifier=BalancedRandomForestClassifier(), RUSBoostClassifier=RUSBoostClassifier(), SVC=SVC(C=0.01, gamma='auto')) if pipe is None: feature_s = {} feature_s.update(**feature_m, **feature_u) return { 'clean': clean.keys(), 'encoding': encode.keys(), 'resample': resample.keys(), 'scale': scale.keys(), 'feature_c': feature_c.keys(), 'feature_s': feature_s.keys(), 'classifier': estimator.keys() } elif isinstance(pipe, str): l = pipe.split('_') all_keys_dict = {} all_keys_dict.update(**clean, **encode, **scale, **feature_c, **feature_m, **feature_u, **estimator, **resample) steps = [] for i in l: if all_keys_dict.get(i) is not None: steps.append((i, all_keys_dict.get(i))) else: raise KeyError( "'{}' invalid key for sklearn estimators".format(i)) return Pipeline(steps) else: raise ValueError("input pipe must be a string in format 'xx[_xx]'")
print('Accuracy of LDA transform test: %.2f' % accuracy_score(y_test, y_pred)) print('Accuracy of LDA transform train: %.2f' % accuracy_score(y_train, y_pred_train)) #svm svm = SVC(kernel='linear', C=1.0, random_state=1) svm.fit(X_train_lda, y_train) y_pred = lr.predict(X_test_lda) y_pred_train = lr.predict(X_train_lda) print('Accuracy of LDA transform SVM test: %.2f' % accuracy_score(y_test, y_pred)) print('Accuracy of LDA transform SVM train: %.2f' % accuracy_score(y_train, y_pred_train)) #kPCA transform scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) X_train_kpca = scikit_kpca.fit_transform(X_train_std) X_test_kpca = scikit_kpca.transform(X_test_std) #Logistic lr = lr.fit(X_train_kpca, y_train) y_pred = lr.predict(X_test_kpca) y_pred_train = lr.predict(X_train_kpca) print('Accuracy of kPCA transform test: %.2f' % accuracy_score(y_test, y_pred)) print('Accuracy of kPCA transform train: %.2f' % accuracy_score(y_train, y_pred_train)) #svm svm = SVC(kernel='linear', C=1.0, random_state=1) svm.fit(X_train_kpca, y_train) y_pred = lr.predict(X_test_kpca) y_pred_train = lr.predict(X_train_kpca)
from load_dataset import load_data from sklearn.preprocessing import StandardScaler from sklearn.model_selection import GridSearchCV data_dir = '/home/kangle/dataset/PedBicCarData' train_data, train_label, test_data, test_label = load_data(data_dir, 2, 2) scaler = StandardScaler() train_data = scaler.fit_transform(train_data) test_data = scaler.transform(test_data) print('\nbegin PCA process.') #pca = PCA(n_components=1000, svd_solver='randomized', whiten=True).fit(train_data) pca = KernelPCA(n_components=30, kernel='cosine', eigen_solver='arpack', n_jobs=8, fit_inverse_transform=True).fit(train_data) #pca = SparsePCA(n_components=50,n_jobs=4).fit(train_data) train_feature = pca.transform(train_data) print(train_feature.shape) train_feature_inverse = pca.inverse_transform(train_feature) print( np.linalg.norm(train_feature_inverse - train_feature) / np.linalg.norm(train_feature)) #plt.plot(pca.explained_variance_ratio_) #plt.plot(np.cumsum(pca.explained_variance_ratio_)) #plt.show()
X_train1, X_test1 = data_preprocessing1( dataset=X_train1, test_dataset=X_test1, pairs=[[["carat", "depth", "table", "price", "x", "y", "z"], [scaler]], [["color", "clarity"], [enc]]]) X_train2, X_test2 = data_preprocessing1( dataset=X_train2, test_dataset=X_test2, pairs=[[[ "fixed acidity", "volatile acidity", "citric acid", "residual sugar", "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density", "pH", "sulphates", "alcohol" ], [scaler]], [[], [enc]]]) #kpca = KernelPCA(kernel="rbf", n_components=3) kpca = KernelPCA(kernel="rbf", n_components=5) #kpca = KernelPCA(kernel="rbf", n_components=6) kpca.fit(X_train) X_kpca = kpca.transform(X_train) X_kpca_test = kpca.transform(X_test) lda = LinearDiscriminantAnalysis() lda.fit(X_kpca, y_train) X_lda = lda.transform(X_kpca) X_lda_test = lda.transform(X_kpca_test) #kpca1 = KernelPCA(kernel="rbf", n_components=5) kpca1 = KernelPCA(kernel="rbf", n_components=14) #kpca1 = KernelPCA(kernel="rbf", n_components=22) kpca1.fit(X_train1) X_kpca1 = kpca1.transform(X_train1) X_kpca_test1 = kpca1.transform(X_test1)
def DecomposedFeatures(train, test, val, total, addtrain, addtest, use_pca=0.0, use_tsvd=0.0, use_ica=0.0, use_fa=0.0, use_grp=0.0, use_srp=0.0, use_KPCA=0.0, kernal="rbf"): print("\nStart decomposition process...") train_decomposed = [] test_decomposed = [] val_decomposed = [] if addtrain is not None: train_decomposed = [addtrain] val_decomposed = [addtrain] if addtest is not None: test_decomposed = [addtest] if use_pca > 0.0: print("PCA") N_COMP = int(use_pca * train.shape[1]) + 1 pca = PCA(n_components=N_COMP, whiten=True, svd_solver="full", random_state=42) pca_results = pca.fit(total) pca_results_train = pca.transform(train) pca_results_test = pca.transform(test) pca_results_val = pca.transform(val) train_decomposed.append(pca_results_train) test_decomposed.append(pca_results_test) val_decomposed.append(pca_results_val) if use_tsvd > 0.0: print("tSVD") N_COMP = int(use_tsvd * train.shape[1]) + 1 tsvd = TruncatedSVD(n_components=N_COMP, random_state=42) tsvd_results = tsvd.fit(total) tsvd_results_train = tsvd.transform(train) tsvd_results_test = tsvd.transform(test) tsvd_results_val = tsvd.transform(val) train_decomposed.append(tsvd_results_train) test_decomposed.append(tsvd_results_test) val_decomposed.append(tsvd_results_val) if use_ica > 0.0: print("ICA") N_COMP = int(use_ica * train.shape[1]) + 1 ica = FastICA(n_components=N_COMP, random_state=42) ica_results = ica.fit(total) ica_results_train = ica.transform(train) ica_results_test = ica.transform(test) ica_results_val = ica.transform(val) train_decomposed.append(ica_results_train) test_decomposed.append(ica_results_test) val_decomposed.append(ica_results_val) if use_fa > 0.0: print("FA") N_COMP = int(use_fa * train.shape[1]) + 1 fa = FactorAnalysis(n_components=N_COMP, random_state=42) fa_results = fa.fit(total) fa_results_train = fa.transform(train) fa_results_test = fa.transform(test) fa_results_val = fa.transform(val) train_decomposed.append(fa_results_train) test_decomposed.append(fa_results_test) val_decomposed.append(fa_results_val) if use_grp > 0.0 or use_grp < 0.0: print("GRP") if use_grp > 0.0: N_COMP = int(use_grp * train.shape[1]) + 1 eps = 10 if use_grp < 0.0: N_COMP = "auto" eps = abs(use_grp) grp = GaussianRandomProjection(n_components=N_COMP, eps=eps, random_state=42) grp_results = grp.fit(total) grp_results_train = grp.transform(train) grp_results_test = grp.transform(test) grp_results_val = grp.transform(val) train_decomposed.append(grp_results_train) test_decomposed.append(grp_results_test) val_decomposed.append(grp_results_val) if use_srp > 0.0: print("SRP") N_COMP = int(use_srp * train.shape[1]) + 1 srp = SparseRandomProjection(n_components=N_COMP, dense_output=True, random_state=42) srp_results = srp.fit(total) srp_results_train = srp.transform(train) srp_results_test = srp.transform(test) srp_results_val = pca.transform(val) train_decomposed.append(srp_results_train) test_decomposed.append(srp_results_test) val_decomposed.append(srp_results_val) if use_KPCA > 0.0: print("KPCA") N_COMP = int(use_KPCA * train.shape[1]) + 1 #N_COMP = None pls = KernelPCA(n_components=N_COMP, kernel=kernal) pls_results = pls.fit(total) pls_results_train = pls.transform(train) pls_results_test = pls.transform(test) pls_results_val = pls.transform(val) train_decomposed.append(pls_results_train) test_decomposed.append(pls_results_test) val_decomposed.append(pls_results_val) gc.collect() print("Append decomposition components together...") train_decomposed = np.concatenate(train_decomposed, axis=1) test_decomposed = np.concatenate(test_decomposed, axis=1) val_decomposed = np.concatenate(val_decomposed, axis=1) train_with_only_decomposed_features = pd.DataFrame(train_decomposed) test_with_only_decomposed_features = pd.DataFrame(test_decomposed) val_with_only_decomposed_features = pd.DataFrame(val_decomposed) #for agg_col in ['sum', 'var', 'mean', 'median', 'std', 'weight_count', 'count_non_0', 'num_different', 'max', 'min']: # train_with_only_decomposed_features[col] = train[col] # test_with_only_decomposed_features[col] = test[col] # Remove any NA train_with_only_decomposed_features = train_with_only_decomposed_features.fillna( 0) test_with_only_decomposed_features = test_with_only_decomposed_features.fillna( 0) val_with_only_decomposed_features = val_with_only_decomposed_features.fillna( 0) return train_with_only_decomposed_features, test_with_only_decomposed_features, val_with_only_decomposed_features
def make(): for i in range(len(file_name)): name=file_name[i] data = mne.io.read_raw_edf(os.path.join(PATH, name)) new_data = data.get_data() num_channel, data_len =new_data.shape j=len_data m=-1 with open(save_PATH + 'EEG_filenames/' + 'EEG_filenames.txt', 'a') as f: while True: k = np.random.randint(5, 15) j += sampling_stride + k +len_data if j >= new_data.shape[1]: #(9760?) ---> EEG_segmented는 총 #3가 나온다 break EEG_segmented = new_data[:, j - len_data*2 - sampling_stride - k:j - len_data - sampling_stride - k] print('first') print(EEG_segmented.shape) # (31,2200) n=len_window apply_5f=np.zeros((EEG_segmented.shape[0]*5, int(EEG_segmented.shape[1]/50)-int(1))) final_data = np.zeros((final_data_shape[0], final_data_shape[1])) print(apply_5f.shape) # (155,44) m += 1 # apply_5f 를 만들어준다 최종 shape (155,43) for u in range(num_window): # range(43) n += len_window_stride window = EEG_segmented[:, n - len_window - len_window_stride:n - len_window_stride] print(window.shape) # (31,100) for p in range(window.shape[0]): #window.shape[0] = 31 apply_5f[5*p, u] = root_mean_square(window[p,:]) apply_5f[5*p + 1, u] = zero_crossing_rate(window[p, :]) apply_5f[5*p + 2, u] = moving_window_average(window[p, :]) apply_5f[5*p + 3, u] = kurtosis(window[p, :]) apply_5f[5*p + 4, u] = spectral_entropy(window[p, :], 1000, method='fft') #kpca apply_5f_new=np.transpose(apply_5f) print(apply_5f_new.shape) # (43, 155) (N_samples, n_features) kpca = KernelPCA(n_components=30, kernel='linear', gamma='none') post_kpca = kpca.fit_transform(apply_5f_new) post_kpca=np.transpose(post_kpca) print(post_kpca.shape) # (30,43) # delta, delta-delta for r in range(post_kpca.shape[0]): final_data[3 * r, :] = post_kpca[r, :] final_data[3 * r + 1, :] = delta(post_kpca[r, :]) final_data[3 * r + 2, :] = delta(delta(post_kpca[r, :])) print('final_data shape') print(final_data.shape) np.save(save_PATH + 'EEG_datas/' + 'EEG_{0}_{1}.npy'.format(i,m), final_data) f.write('EEG_{0}_{1}.npy\n'.format(i,m))
def btnConvert_click(self): totalTime = 0 msgBox = QMessageBox() # Batch try: Batch = np.int32(ui.txtBatch.text()) except: msgBox.setText("Size of batch is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if Batch == 0: Batch = None # Kernel Kernel = ui.cbKernel.currentText() # Method Method = ui.cbMethod.currentText() # Gamma try: Gamma = np.float(ui.txtGamma.text()) except: msgBox.setText("Gamma is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Degree try: Degree = np.int32(ui.txtDegree.text()) except: msgBox.setText("Degree is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Coef0 try: Coef0 = np.float(ui.txtCoef0.text()) except: msgBox.setText("Coef0 is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Alpha try: Alpha = np.int32(ui.txtAlpha.text()) except: msgBox.setText("Alpha is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Tol try: Tol = np.float(ui.txtTole.text()) except: msgBox.setText("Tolerance is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # MaxIte try: MaxIter = np.int32(ui.txtMaxIter.text()) except: msgBox.setText("Maximum number of iterations is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if MaxIter <= 0: MaxIter = None # Number of Job try: NJob = np.int32(ui.txtJobs.text()) except: msgBox.setText("The number of parallel jobs is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NJob < -1 or NJob == 0: msgBox.setText("The number of parallel jobs must be -1 or greater than 0!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False TrFoldErr = list() TeFoldErr = list() try: FoldFrom = np.int32(ui.txtFoldFrom.text()) FoldTo = np.int32(ui.txtFoldTo.text()) except: print("Please check fold parameters!") return if FoldTo < FoldFrom: print("Please check fold parameters!") return for fold_all in range(FoldFrom, FoldTo+1): tic = time.time() # Regularization try: Regularization = np.float(ui.txtRegularization.text()) except: msgBox.setText("Regularization value is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # OutFile OutFile = ui.txtOutFile.text() OutFile = OutFile.replace("$FOLD$", str(fold_all)) if not len(OutFile): msgBox.setText("Please enter out file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # InFile InFile = ui.txtInFile.text() InFile = InFile.replace("$FOLD$", str(fold_all)) if not len(InFile): msgBox.setText("Please enter input file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not os.path.isfile(InFile): msgBox.setText("Input file not found!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False InData = io.loadmat(InFile) OutData = dict() OutData["imgShape"] = InData["imgShape"] # Data if not len(ui.txtITrData.currentText()): msgBox.setText("Please enter Input Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeData.currentText()): msgBox.setText("Please enter Input Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrData.text()): msgBox.setText("Please enter Output Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeData.text()): msgBox.setText("Please enter Output Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: XTr = InData[ui.txtITrData.currentText()] XTe = InData[ui.txtITeData.currentText()] if ui.cbScale.isChecked() and not ui.rbScale.isChecked(): XTr = preprocessing.scale(XTr) XTe = preprocessing.scale(XTe) print("Whole of data is scaled X~N(0,1).") except: print("Cannot load data") return # NComponent try: NumFea = np.int32(ui.txtNumFea.text()) except: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea < 1: msgBox.setText("Number of features must be greater than zero!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(XTr)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Label if not len(ui.txtITrLabel.currentText()): msgBox.setText("Please enter Train Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeLabel.currentText()): msgBox.setText("Please enter Test Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrLabel.text()): msgBox.setText("Please enter Train Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeLabel.text()): msgBox.setText("Please enter Test Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrLabel.text()] = InData[ui.txtITrLabel.currentText()] OutData[ui.txtOTeLabel.text()] = InData[ui.txtITeLabel.currentText()] except: print("Cannot load labels!") # Subject if not len(ui.txtITrSubject.currentText()): msgBox.setText("Please enter Train Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeSubject.currentText()): msgBox.setText("Please enter Test Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrSubject.text()): msgBox.setText("Please enter Train Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeSubject.text()): msgBox.setText("Please enter Test Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrSubject = InData[ui.txtITrSubject.currentText()] OutData[ui.txtOTrSubject.text()] = TrSubject TeSubject = InData[ui.txtITeSubject.currentText()] OutData[ui.txtOTeSubject.text()] = TeSubject except: print("Cannot load Subject IDs") return # Task if ui.cbTask.isChecked(): if not len(ui.txtITrTask.currentText()): msgBox.setText("Please enter Input Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeTask.currentText()): msgBox.setText("Please enter Input Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrTask.text()): msgBox.setText("Please enter Output Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeTask.text()): msgBox.setText("Please enter Output Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrTask = InData[ui.txtITrTask.currentText()] OutData[ui.txtOTrTask.text()] = TrTask TeTask = InData[ui.txtITeTask.currentText()] OutData[ui.txtOTeTask.text()] = TeTask TrTaskIndex = TrTask.copy() for tasindx, tas in enumerate(np.unique(TrTask)): TrTaskIndex[TrTask == tas] = tasindx + 1 TeTaskIndex = TeTask.copy() for tasindx, tas in enumerate(np.unique(TeTask)): TeTaskIndex[TeTask == tas] = tasindx + 1 except: print("Cannot load Tasks!") return # Run if ui.cbRun.isChecked(): if not len(ui.txtITrRun.currentText()): msgBox.setText("Please enter Train Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeRun.currentText()): msgBox.setText("Please enter Test Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrRun.text()): msgBox.setText("Please enter Train Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeRun.text()): msgBox.setText("Please enter Test Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrRun = InData[ui.txtITrRun.currentText()] OutData[ui.txtOTrRun.text()] = TrRun TeRun = InData[ui.txtITeRun.currentText()] OutData[ui.txtOTeRun.text()] = TeRun except: print("Cannot load Runs!") return # Counter if ui.cbCounter.isChecked(): if not len(ui.txtITrCounter.currentText()): msgBox.setText("Please enter Train Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeCounter.currentText()): msgBox.setText("Please enter Test Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrCounter.text()): msgBox.setText("Please enter Train Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeCounter.text()): msgBox.setText("Please enter Test Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrCounter = InData[ui.txtITrCounter.currentText()] OutData[ui.txtOTrCounter.text()] = TrCounter TeCounter = InData[ui.txtITeCounter.currentText()] OutData[ui.txtOTeCounter.text()] = TeCounter except: print("Cannot load Counters!") return # Matrix Label if ui.cbmLabel.isChecked(): if not len(ui.txtITrmLabel.currentText()): msgBox.setText("Please enter Train Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITemLabel.currentText()): msgBox.setText("Please enter Test Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrmLabel.text()): msgBox.setText("Please enter Train Output Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTemLabel.text()): msgBox.setText("Please enter Test Output Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrmLabel.text()] = InData[ui.txtITrmLabel.currentText()] OutData[ui.txtOTemLabel.text()] = InData[ui.txtITemLabel.currentText()] except: print("Cannot load matrix lables!") return # Design if ui.cbDM.isChecked(): if not len(ui.txtITrDM.currentText()): msgBox.setText("Please enter Train Input Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeDM.currentText()): msgBox.setText("Please enter Test Input Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrDM.text()): msgBox.setText("Please enter Train Output Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeDM.text()): msgBox.setText("Please enter Test Output Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrDM.text()] = InData[ui.txtITrDM.currentText()] OutData[ui.txtOTeDM.text()] = InData[ui.txtITeDM.currentText()] except: print("Cannot load design matrices!") return # Coordinate if ui.cbCol.isChecked(): if not len(ui.txtCol.currentText()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCol.text()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCol.text()] = InData[ui.txtCol.currentText()] except: print("Cannot load coordinator!") return # Condition if ui.cbCond.isChecked(): if not len(ui.txtCond.currentText()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCond.text()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCond.text()] = InData[ui.txtCond.currentText()] except: print("Cannot load conditions!") return # FoldID if ui.cbFoldID.isChecked(): if not len(ui.txtFoldID.currentText()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldID.text()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldID.text()] = InData[ui.txtFoldID.currentText()] except: print("Cannot load Fold ID!") return # FoldInfo if ui.cbFoldInfo.isChecked(): if not len(ui.txtFoldInfo.currentText()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldInfo.text()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldInfo.text()] = InData[ui.txtFoldInfo.currentText()] except: print("Cannot load Fold Info!") return pass # Number of Scan if ui.cbNScan.isChecked(): if not len(ui.txtITrScan.currentText()): msgBox.setText("Please enter Number of Scan variable name for Input Train!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeScan.currentText()): msgBox.setText("Please enter Number of Scan variable name for Input Test!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrScan.text()): msgBox.setText("Please enter Number of Scan variable name for Output Train!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeScan.text()): msgBox.setText("Please enter Number of Scan variable name for Output Test!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrScan.text()] = InData[ui.txtITrScan.currentText()] OutData[ui.txtOTeScan.text()] = InData[ui.txtITeScan.currentText()] except: print("Cannot load NScan!") return # Train Analysis Level print("Calculating Analysis Level for Training Set ...") TrGroupFold = None FoldStr = "" if ui.cbFSubject.isChecked(): if not ui.rbFRun.isChecked(): TrGroupFold = TrSubject FoldStr = "Subject" else: TrGroupFold = np.concatenate((TrSubject,TrRun)) FoldStr = "Subject+Run" if ui.cbFTask.isChecked(): TrGroupFold = np.concatenate((TrGroupFold,TrTaskIndex)) if TrGroupFold is not None else TrTaskIndex FoldStr = FoldStr + "+Task" if ui.cbFCounter.isChecked(): TrGroupFold = np.concatenate((TrGroupFold,TrCounter)) if TrGroupFold is not None else TrCounter FoldStr = FoldStr + "+Counter" TrGroupFold = np.transpose(TrGroupFold) TrUniqFold = np.array(list(set(tuple(i) for i in TrGroupFold.tolist()))) TrFoldIDs = np.arange(len(TrUniqFold)) + 1 TrListFold = list() for gfold in TrGroupFold: for ufoldindx, ufold in enumerate(TrUniqFold): if (ufold == gfold).all(): currentID = TrFoldIDs[ufoldindx] break TrListFold.append(currentID) TrListFold = np.int32(TrListFold) TrListFoldUniq = np.unique(TrListFold) # Test Analysis Level print("Calculating Analysis Level for Testing Set ...") TeGroupFold = None if ui.cbFSubject.isChecked(): if not ui.rbFRun.isChecked(): TeGroupFold = TeSubject else: TeGroupFold = np.concatenate((TeSubject,TeRun)) if ui.cbFTask.isChecked(): TeGroupFold = np.concatenate((TeGroupFold,TeTaskIndex)) if TeGroupFold is not None else TeTaskIndex if ui.cbFCounter.isChecked(): TeGroupFold = np.concatenate((TeGroupFold,TeCounter)) if TeGroupFold is not None else TeCounter TeGroupFold = np.transpose(TeGroupFold) TeUniqFold = np.array(list(set(tuple(i) for i in TeGroupFold.tolist()))) TeFoldIDs = np.arange(len(TeUniqFold)) + 1 TeListFold = list() for gfold in TeGroupFold: for ufoldindx, ufold in enumerate(TeUniqFold): if (ufold == gfold).all(): currentID = TeFoldIDs[ufoldindx] break TeListFold.append(currentID) TeListFold = np.int32(TeListFold) TeListFoldUniq = np.unique(TeListFold) # Train Partition print("Partitioning Training Data ...") TrX = list() TrShape = None if Method == "PCA": svdmodel = PCA(n_components=NumFea,copy=False,tol=Tol) elif Method == "Kernel PCA": svdmodel = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\ coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False) else: svdmodel = IncrementalPCA(n_components=NumFea,copy=False,batch_size=Batch) for foldindx, fold in enumerate(TrListFoldUniq): dat = XTr[np.where(TrListFold == fold)] if ui.cbScale.isChecked() and ui.rbScale.isChecked(): dat = preprocessing.scale(dat) print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).") dat = svdmodel.fit_transform(dat) TrX.append(dat) if TrShape is None: TrShape = np.shape(dat) else: if not(TrShape == np.shape(dat)): print("ERROR: Train, Reshape problem for Fold " + str(foldindx + 1) + ", Shape: " + str(np.shape(dat))) return print("Train: View " + str(foldindx + 1) + " is extracted. Shape: " + str(np.shape(dat))) print("Training Shape: " + str(np.shape(TrX))) # Test Partition print("Partitioning Testing Data ...") TeX = list() TeShape = None for foldindx, fold in enumerate(TeListFoldUniq): dat = XTe[np.where(TeListFold == fold)] if ui.cbScale.isChecked() and ui.rbScale.isChecked(): dat = preprocessing.scale(dat) print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).") dat = svdmodel.fit_transform(dat) TeX.append(dat) if TeShape is None: TeShape = np.shape(dat) else: if not(TeShape == np.shape(dat)): print("Test: Reshape problem for Fold " + str(foldindx + 1)) return print("Test: View " + str(foldindx + 1) + " is extracted.") print("Testing Shape: " + str(np.shape(TeX))) model = RHA(Dim=NumFea,regularization=Regularization) print("Running Hyperalignment on Training Data ...") MappedXtr, G = model.train(TrX) print("Running Hyperalignment on Testing Data ...") MappedXte = model.test(TeX) # Train Dot Product print("Producting Training Data ...") TrHX = None TrErr = None for foldindx, fold in enumerate(TrListFoldUniq): TrErr = TrErr + (G - MappedXtr[foldindx]) if TrErr is not None else G - MappedXtr[foldindx] TrHX = np.concatenate((TrHX, MappedXtr[foldindx])) if TrHX is not None else MappedXtr[foldindx] OutData[ui.txtOTrData.text()] = TrHX foldindx = foldindx + 1 TrErr = TrErr / foldindx print("Train: alignment error ", np.linalg.norm(TrErr)) TrFoldErr.append(np.linalg.norm(TrErr)) # Train Dot Product print("Producting Testing Data ...") TeHX = None TeErr = None for foldindx, fold in enumerate(TeListFoldUniq): TeErr = TeErr + (G - MappedXte[foldindx]) if TeErr is not None else G - MappedXte[foldindx] TeHX = np.concatenate((TeHX, MappedXte[foldindx])) if TeHX is not None else MappedXte[foldindx] OutData[ui.txtOTeData.text()] = TeHX foldindx = foldindx + 1 TeErr = TeErr / foldindx print("Test: alignment error ", np.linalg.norm(TeErr)) TeFoldErr.append(np.linalg.norm(TeErr)) HAParam = dict() HAParam["Method"]= Method HAParam["Kernel"]= Kernel HAParam["Share"] = G HAParam["Level"] = FoldStr OutData["FunctionalAlignment"] = HAParam OutData["Runtime"] = time.time() - tic totalTime += OutData["Runtime"] print("Saving ...") io.savemat(OutFile, mdict=OutData) print("Fold " + str(fold_all) + " is DONE: " + OutFile) print("Training -> Alignment Error: mean " + str(np.mean(TrFoldErr)) + " std " + str(np.std(TrFoldErr))) print("Testing -> Alignment Error: mean " + str(np.mean(TeFoldErr)) + " std " + str(np.std(TeFoldErr))) print("Runtime: ", totalTime) print("Kernel/SVD Hyperalignment is done.") msgBox.setText("Kernel/SVD Hyperalignment is done.") msgBox.setIcon(QMessageBox.Information) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_()
# Show user which aggregates were created print( f">> Created {len(aggregate_df.columns)} features for; {aggregate_df.columns.tolist()}" ) COMPONENTS = 10 # Convert to sparse matrix sparse_matrix = scipy.sparse.csr_matrix(total_df.values) # Data to be passed to t-SNE tsvd = TruncatedSVD(n_components=1000).fit_transform(sparse_matrix) # V1 List of decomposition methods methods = [{ 'method': KernelPCA(n_components=2, kernel="rbf"), 'data': 'total' }, { 'method': FactorAnalysis(n_components=COMPONENTS), 'data': 'total' }, { 'method': TSNE(n_components=3, init='pca'), 'data': 'tsvd' }, { 'method': TruncatedSVD(n_components=COMPONENTS), 'data': 'sparse' }, { 'method': PCA(n_components=COMPONENTS), 'data': 'total' }, { 'method': FastICA(n_components=COMPONENTS),
fig = plt.figure(figsize=(6, 5)) ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot) ax.view_init(10, -70) ax.set_title("Swiss roll") ax.set_xlabel("$x_1$", fontsize=18) ax.set_ylabel("$x_2$", fontsize=18) ax.set_zlabel("$x_3$", fontsize=18) ax.set_xlim(axes[0:2]) ax.set_ylim(axes[2:4]) ax.set_zlim(axes[4:6]) plt.show() rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04) X_reduced = rbf_pca.fit_transform(X) lin_pca = KernelPCA(n_components = 2, kernel="linear", fit_inverse_transform=True) rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True) sig_pca = KernelPCA(n_components = 2, kernel="sigmoid", gamma=0.001, coef0=1, fit_inverse_transform=True) y = t > 6.9 plt.figure(figsize=(11, 4)) for subplot, pca, title in ((131, lin_pca, "Linear kernel"), (132, rbf_pca, "RBF kernel, $\gamma=0.04$"), (133, sig_pca, "Sigmoid kernel, $\gamma=10^{-3}, r=1$")): X_reduced = pca.fit_transform(X) if subplot == 132: X_reduced_rbf = X_reduced plt.subplot(subplot)
y, test_size=0.25, random_state=0) #Feature Scaling from sklearn.preprocessing import StandardScaler #To get more accurate results sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) #Applying Kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA( n_components=2, kernel='rbf' ) #We use None here initially instead of 2 since we need to compare all the variances of the independent variables and then choose the two best ones. X_train = kpca.fit_transform( X_train ) #We dont take the dependent variable (y_train in this case) as PCA is unsupervised X_test = kpca.transform(X_test) # Fitting Logistic Regression to the Training Set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) #To get the same result classifier.fit(X_train, y_train) #Prediciting Test set Results y_pred = classifier.predict(X_test)
fig, axs = plt.subplots(2, 4) for i in range(inverse_data.shape[1] - 1): axs[i // 4, i % 4].scatter(inverse_data[:, i], data[:, (i + 1)], c=labels, cmap='hsv') axs[i // 4, i % 4].set_xlabel(var_names[i]) axs[i // 4, i % 4].set_ylabel(var_names[i + 1]) plt.show() #KernelPCA kernelPCA = KernelPCA(n_components=4, kernel='cosine') kernelPCA_data = kernelPCA.fit(data).transform(data) fig, axs = plt.subplots(1, 1) plt.scatter(kernelPCA_data[:, 0], kernelPCA_data[:, 1], c=labels, cmap='hsv') plt.show() plt.show() #SparsePCA sparsePCA = SparsePCA(n_components=4, alpha=0.0) sparsePCA_data = sparsePCA.fit(data).transform(data) fig, axs = plt.subplots(1, 1)
# Spliting into training and test set from sklearn.model_selection import train_test_split train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.20, random_state=0) # Feature scaling on data from sklearn.preprocessing import StandardScaler sc_x = StandardScaler() train_X = sc_x.fit_transform(train_X) test_X = sc_x.transform(test_X) # Applying kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components=2, kernel='rbf') train_X = kpca.fit_transform(train_X) test_X = kpca.transform(test_X) # Applying logistic clasification model from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(train_X, train_y) # Predicting the output of model y_pred = classifier.predict(test_X) # Testing the outcome from sklearn.metrics import confusion_matrix, accuracy_score cm = confusion_matrix(test_y, y_pred) score = accuracy_score(test_y, y_pred) * 100
# Its first line contains information about the simulation info_str = samples_file.readline() print(info_str[:-1]) # Collect all the samples into X X = [] for x in samples_file: X.append(np.array(x[0:-1].split(' ')).astype("float64")) X = np.asanyarray(X) samples_file.close() print("Read", len(X), "samples of dimension", len(X[0])) m = len(X[0]) # start the kernel PCA to find low energy submanifold kpcaRBF = KernelPCA(n_components=3, kernel="rbf", fit_inverse_transform=True, n_jobs=-1) reducedXrbf = kpcaRBF.fit_transform(X) # Let's plot the reduced 3D space fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(reducedXrbf[:, 0], reducedXrbf[:, 1], reducedXrbf[:, 2]) plt.title("3D reduction of the " + str(m) + "D parameter with rbf kernel") plt.savefig("ALLDATA.png") # plt.show() reconstructedX = kpcaRBF.inverse_transform(reducedXrbf) print("Reconstructing error: ", norm(X - reconstructedX)) print("Let's find the surface of minimal energy") # Information for defining the potential
file_path = os.path.join(curr_path, data_path, file_name) img = nib.load(file_path).get_data() img = np.sum( img, axis=3) # to remove the '4th' dimension which is basically intesity # crop the image with given offset img = img[rmin - offset:rmax + offset, cmin - offset:cmax + offset, zmin - offset:zmax + offset] X[t, :] = img.reshape(n, ) print('file ', file_name, ' read successfully') n_features = 30 # number of low dimensional features we want print("Extracting the top %d eigenimages from %d images" % (n_features, n_train)) #pca = PCA(n_components = n_features, svd_solver = 'randomized' , whiten = True).fit(X) kpca = KernelPCA(kernel="linear").fit(X) X_train_pca = kpca.transform(X) #eigenimages = pca.components_ # This is the matrix used to transform to lowdimensional feature space #X_train_pca = pca.transform(X) # Training sets after transformation print("PCA completed successfully ...") # Load csv file into numpy array age_train = np.genfromtxt(os.path.join(curr_path, 'targets.csv'), delimiter=',') #reg = make_pipeline(PolynomialFeatures(degree), RidgeCV()) reg = SVC() reg.fit(X_train_pca, age_train) print("Data fitted with CV Ridge Regression") # Prediction Error
alpha=0.6, c=cmap(idx), edgecolor='black', marker=markers[idx], label=cl) plot_decision_regions(X_train_pca, y_train, classifier=svm) plt.xlabel('LDA_SVM_PC 1') plt.ylabel('LDA_SVM_PC 2') plt.legend(loc='lower left') plt.show() #kPCA scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=0.1) #Gamma values can be updated here X_train_skernpca = scikit_kpca.fit_transform(X_train_std, y_train) X_test_skernpca = scikit_kpca.transform(X_test_std) #Logistic Regression fitted on KPCA transformed data lr = LogisticRegression() lr.fit(X_train_skernpca, y_train) kpca_lr_y_train_pred = lr.predict(X_train_skernpca) kpca_lr_y_pred = lr.predict(X_test_skernpca) print("KPCA Logistic Regression train accuracy score (gamma=0.1): ", metrics.accuracy_score(y_train, kpca_lr_y_train_pred)) print("KPCA Logistic Regression test accuracy score (gamma=0.1): ", metrics.accuracy_score(y_test, kpca_lr_y_pred)) #SVM Regression fitted on KPCA transformed dataset
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload, dropped_class_numbers): if (method == "Kmeans2D"): components = 2 if (method == "Kmeans1D" or method == "Thresholding"): components = 1 flag = 0 resetflag = 0 logger.writelog(components, "Components") logger.result_open(method) print(method) max_sc = -100.0 best_purity = 0.0 best_gamma = 0.0 serial_num = 0 try: for i in range(0, params + 1): transformer = KernelPCA(n_components=components, kernel='rbf', gamma=gamma) data_transformed = transformer.fit_transform(data) df = pd.DataFrame(data_transformed) df.to_csv(KPCA_output_path, index=False, header=None) del df gc.collect() if (method == "Thresholding"): if (flag == 0): os.system("cc c_thresholding_new.c") flag = 1 start = timeit.default_timer() os.system("./a.out " + str(clusters) + " " + str(rows_toload)) end = timeit.default_timer() thresholding_time = (end - start) sc = silhouette.silhouette(KPCA_output_path, Thresholding_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 't', Thresholding_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, thresholding_time, gamma, sc, purity) #print(i+1,thresholding_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, thresholding_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) else: kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1], clusters) kmeans.groundtruth_distribution(KMeans_paths[1], KMeans_paths[0], datafiles_names[0], datafiles_names[2], clusters) sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1]) groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian( 'k', KMeans_paths[0], clusters, rows_toload, dropped_class_numbers) logger.writeresult(i + 1, clusters, method, kmeans_time, gamma, sc, purity) #print(i+1,kmeans_time,gamma,sc,purity) if (i < params): if (sc > max_sc): max_sc = sc best_gamma = gamma best_purity = purity serial_num = i + 1 if (i == (params - 1)): gamma = best_gamma sc = max_sc purity = best_purity if (i == params): print(best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writeresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) logger.writeresult(" ", " ", " ", " ", " ", " ", " ") logger.writefinalresult(serial_num, clusters, method, kmeans_time, best_gamma, max_sc, best_purity) write_hungarian_result(best_gamma, clusters, groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, best_purity, method, params, stepsize, dropped_class_numbers) if (i < (params - 1)): gamma = gamma + stepsize except (KeyboardInterrupt, SystemExit, Exception) as ex: ex_type, ex_value, ex_traceback = sys.exc_info() trace_back = traceback.extract_tb(ex_traceback) logger.writelog(str(ex_type.__name__), "Exception Type") logger.writelog(str(ex_value), "Exception Message") logger.writelog(str(trace_back), "Traceback") finally: logger.result_close()
if mode == MODE_SDIC: vic = sdic.sdic(sdic.SDIC_TYPE_SDIC) vic.fit(x_train) x_train_new = vic.transform(x_train) x_test_new = vic.transform(x_test) elif mode == MODE_SDIC_C: vic = sdic.sdic(sdic.SDIC_TYPE_SDIC_C) vic.fit(x_train) x_train_new = vic.transform(x_train) x_test_new = vic.transform(x_test) elif mode == MODE_DI: x_train_new = np.zeros((x_train.shape[0], img_size, img_size)) x_test_new = np.zeros((x_test.shape[0], img_size, img_size)) from sklearn.decomposition import KernelPCA pca = KernelPCA(n_components=2) X = x_train.reshape(x_train.shape[0], img_size * img_size) Xt = x_test.reshape(x_test.shape[0], img_size * img_size) x = pca.fit_transform(np.transpose(X)) x[:, 0] = x[:, 0] - np.min(x[:, 0]) x[:, 0] = x[:, 0] / np.max(x[:, 0]) * (img_size - 1) x[:, 1] = x[:, 1] - np.min(x[:, 1]) x[:, 1] = x[:, 1] / np.max(x[:, 1]) * (img_size - 1) x = x.round().astype('int') pts_per_coord = {} for i in range(0, x.shape[0]): coord = (x[i, 0], x[i, 1]) x_train_new[:, x[i, 0], x[i, 1]] += X[:, i] if coord not in pts_per_coord:
def btnConvert_click(self): msgBox = QMessageBox() totalTime = 0 # Batch try: Batch = np.int32(ui.txtBatch.text()) except: msgBox.setText("Size of batch is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if Batch == 0: Batch = None # Kernel Kernel = ui.cbKernel.currentText() # Method Method = ui.cbMethod.currentText() # Gamma try: Gamma = np.float(ui.txtGamma.text()) except: msgBox.setText("Gamma is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Degree try: Degree = np.int32(ui.txtDegree.text()) except: msgBox.setText("Degree is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Coef0 try: Coef0 = np.float(ui.txtCoef0.text()) except: msgBox.setText("Coef0 is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Alpha try: Alpha = np.int32(ui.txtAlpha.text()) except: msgBox.setText("Alpha is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Tol try: Tol = np.float(ui.txtTole.text()) except: msgBox.setText("Tolerance is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # MaxIte try: MaxIter = np.int32(ui.txtMaxIter.text()) except: msgBox.setText("Maximum number of iterations is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if MaxIter <= 0: MaxIter = None # Number of Job try: NJob = np.int32(ui.txtJobs.text()) except: msgBox.setText("The number of parallel jobs is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NJob < -1 or NJob == 0: msgBox.setText( "The number of parallel jobs must be -1 or greater than 0!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: FoldFrom = np.int32(ui.txtFoldFrom.text()) FoldTo = np.int32(ui.txtFoldTo.text()) except: print("Please check fold parameters!") return if FoldTo < FoldFrom: print("Please check fold parameters!") return for fold_all in range(FoldFrom, FoldTo + 1): tic = time.time() # OutFile OutFile = ui.txtOutFile.text() OutFile = OutFile.replace("$FOLD$", str(fold_all)) if not len(OutFile): msgBox.setText("Please enter out file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # InFile InFile = ui.txtInFile.text() InFile = InFile.replace("$FOLD$", str(fold_all)) if not len(InFile): msgBox.setText("Please enter input file!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not os.path.isfile(InFile): msgBox.setText("Input file not found!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False InData = io.loadmat(InFile) OutData = dict() OutData["imgShape"] = InData["imgShape"] # Data if not len(ui.txtITrData.currentText()): msgBox.setText("Please enter Input Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeData.currentText()): msgBox.setText("Please enter Input Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrData.text()): msgBox.setText("Please enter Output Train Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeData.text()): msgBox.setText("Please enter Output Test Data variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: XTr = InData[ui.txtITrData.currentText()] XTe = InData[ui.txtITeData.currentText()] if ui.cbScale.isChecked(): XTr = preprocessing.scale(XTr) XTe = preprocessing.scale(XTe) print("Whole of data is scaled X~N(0,1).") except: print("Cannot load data") return try: NumFea = np.int32(ui.txtNumFea.text()) except: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea < 0: msgBox.setText("Number of features must be greater than zero!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(XTr)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if NumFea > np.shape(XTe)[1]: msgBox.setText("Number of features is wrong!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False # Label if not len(ui.txtITrLabel.currentText()): msgBox.setText("Please enter Train Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeLabel.currentText()): msgBox.setText("Please enter Test Input Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrLabel.text()): msgBox.setText( "Please enter Train Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeLabel.text()): msgBox.setText("Please enter Test Output Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrLabel.text()] = InData[ ui.txtITrLabel.currentText()] OutData[ui.txtOTeLabel.text()] = InData[ ui.txtITeLabel.currentText()] except: print("Cannot load labels!") # Subject if not len(ui.txtITrSubject.currentText()): msgBox.setText( "Please enter Train Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeSubject.currentText()): msgBox.setText( "Please enter Test Input Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrSubject.text()): msgBox.setText( "Please enter Train Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeSubject.text()): msgBox.setText( "Please enter Test Output Subject variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrSubject = InData[ui.txtITrSubject.currentText()] OutData[ui.txtOTrSubject.text()] = TrSubject TeSubject = InData[ui.txtITeSubject.currentText()] OutData[ui.txtOTeSubject.text()] = TeSubject except: print("Cannot load Subject IDs") return # Task if ui.cbTask.isChecked(): if not len(ui.txtITrTask.currentText()): msgBox.setText( "Please enter Input Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeTask.currentText()): msgBox.setText( "Please enter Input Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrTask.text()): msgBox.setText( "Please enter Output Train Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeTask.text()): msgBox.setText( "Please enter Output Test Task variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrTask = InData[ui.txtITrTask.currentText()] OutData[ui.txtOTrTask.text()] = TrTask TeTask = InData[ui.txtITeTask.currentText()] OutData[ui.txtOTeTask.text()] = TeTask TrTaskIndex = TrTask.copy() for tasindx, tas in enumerate(np.unique(TrTask)): TrTaskIndex[TrTask == tas] = tasindx + 1 TeTaskIndex = TeTask.copy() for tasindx, tas in enumerate(np.unique(TeTask)): TeTaskIndex[TeTask == tas] = tasindx + 1 except: print("Cannot load Tasks!") return # Run if ui.cbRun.isChecked(): if not len(ui.txtITrRun.currentText()): msgBox.setText( "Please enter Train Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeRun.currentText()): msgBox.setText( "Please enter Test Input Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrRun.text()): msgBox.setText( "Please enter Train Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeRun.text()): msgBox.setText( "Please enter Test Output Run variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrRun = InData[ui.txtITrRun.currentText()] OutData[ui.txtOTrRun.text()] = TrRun TeRun = InData[ui.txtITeRun.currentText()] OutData[ui.txtOTeRun.text()] = TeRun except: print("Cannot load Runs!") return # Counter if ui.cbCounter.isChecked(): if not len(ui.txtITrCounter.currentText()): msgBox.setText( "Please enter Train Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeCounter.currentText()): msgBox.setText( "Please enter Test Input Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrCounter.text()): msgBox.setText( "Please enter Train Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeCounter.text()): msgBox.setText( "Please enter Test Output Counter variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: TrCounter = InData[ui.txtITrCounter.currentText()] OutData[ui.txtOTrCounter.text()] = TrCounter TeCounter = InData[ui.txtITeCounter.currentText()] OutData[ui.txtOTeCounter.text()] = TeCounter except: print("Cannot load Counters!") return # Matrix Label if ui.cbmLabel.isChecked(): if not len(ui.txtITrmLabel.currentText()): msgBox.setText( "Please enter Train Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITemLabel.currentText()): msgBox.setText( "Please enter Test Input Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrmLabel.text()): msgBox.setText( "Please enter Train Output Matrix Label variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTemLabel.text()): msgBox.setText( "Please enter Test Output Matrix Label variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrmLabel.text()] = InData[ ui.txtITrmLabel.currentText()] OutData[ui.txtOTemLabel.text()] = InData[ ui.txtITemLabel.currentText()] except: print("Cannot load matrix lables!") return # Design if ui.cbDM.isChecked(): if not len(ui.txtITrDM.currentText()): msgBox.setText( "Please enter Train Input Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeDM.currentText()): msgBox.setText( "Please enter Test Input Design Matrix variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrDM.text()): msgBox.setText( "Please enter Train Output Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeDM.text()): msgBox.setText( "Please enter Test Output Design Matrix variable name!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrDM.text()] = InData[ ui.txtITrDM.currentText()] OutData[ui.txtOTeDM.text()] = InData[ ui.txtITeDM.currentText()] except: print("Cannot load design matrices!") return # Coordinate if ui.cbCol.isChecked(): if not len(ui.txtCol.currentText()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCol.text()): msgBox.setText("Please enter Coordinator variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCol.text()] = InData[ ui.txtCol.currentText()] except: print("Cannot load coordinator!") return # Condition if ui.cbCond.isChecked(): if not len(ui.txtCond.currentText()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOCond.text()): msgBox.setText("Please enter Condition variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOCond.text()] = InData[ ui.txtCond.currentText()] except: print("Cannot load conditions!") return # FoldID if ui.cbFoldID.isChecked(): if not len(ui.txtFoldID.currentText()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldID.text()): msgBox.setText("Please enter FoldID variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldID.text()] = InData[ ui.txtFoldID.currentText()] except: print("Cannot load Fold ID!") return # FoldInfo if ui.cbFoldInfo.isChecked(): if not len(ui.txtFoldInfo.currentText()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOFoldInfo.text()): msgBox.setText("Please enter FoldInfo variable name!") msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOFoldInfo.text()] = InData[ ui.txtFoldInfo.currentText()] except: print("Cannot load Fold Info!") return pass # Number of Scan if ui.cbNScan.isChecked(): if not len(ui.txtITrScan.currentText()): msgBox.setText( "Please enter Number of Scan variable name for Input Train!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtITeScan.currentText()): msgBox.setText( "Please enter Number of Scan variable name for Input Test!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTrScan.text()): msgBox.setText( "Please enter Number of Scan variable name for Output Train!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False if not len(ui.txtOTeScan.text()): msgBox.setText( "Please enter Number of Scan variable name for Output Test!" ) msgBox.setIcon(QMessageBox.Critical) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_() return False try: OutData[ui.txtOTrScan.text()] = InData[ ui.txtITrScan.currentText()] OutData[ui.txtOTeScan.text()] = InData[ ui.txtITeScan.currentText()] except: print("Cannot load NScan!") return if NumFea == 0: NumFea = np.min(np.shape(XTr)) print("Number of features are automatically selected as ", NumFea) try: if Method == "PCA": model = PCA(n_components=NumFea, copy=False, tol=Tol) elif Method == "Kernel PCA": model = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\ coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False) else: model = IncrementalPCA(n_components=NumFea, copy=False, batch_size=Batch) print("Running PCA Functional Alignment on Training Data ...") OutData[ui.txtOTrData.text()] = model.fit_transform(XTr) print("Running PCA Functional Alignment on Testing Data ...") OutData[ui.txtOTeData.text()] = model.fit_transform(XTe) except Exception as e: print(str(e)) HAParam = dict() HAParam["Method"] = Method HAParam["NumFea"] = NumFea HAParam["Kernel"] = Kernel OutData["FunctionalAlignment"] = HAParam OutData["Runtime"] = time.time() - tic totalTime += OutData["Runtime"] print("Saving ...") io.savemat(OutFile, mdict=OutData) print("Fold " + str(fold_all) + " is DONE: " + OutFile) print("Runtime: ", totalTime) print("PCA Functional Alignment is done.") msgBox.setText("PCA Functional Alignment is done.") msgBox.setIcon(QMessageBox.Information) msgBox.setStandardButtons(QMessageBox.Ok) msgBox.exec_()
Let's use that on the half-moon dataset. ''' from sklearn.decomposition import KernelPCA from sklearn.datasets import make_moons import matplotlib.pyplot as plt import numpy as np X, y = make_moons(n_samples=100, random_state=123) #plt.scatter(X[y==0, 0], X[y==0, 1], color = 'red', marker = '^', alpha = 0.5) #plt.scatter(X[y==1, 0], X[y==1, 1], color = 'blue', marker = 'o', alpha = 0.5) #plt.show() scikit_kpca = KernelPCA(n_components=2, kernel="rbf", gamma=15) X_skernpca = scikit_kpca.fit_transform(X) plt.scatter(X_skernpca[y == 0, 0], X_skernpca[y == 0, 1], color="red", marker="^", alpha=0.5) plt.scatter(X_skernpca[y == 1, 0], X_skernpca[y == 1, 1], color="blue", marker="o", alpha=0.5) plt.xlabel("PC1") plt.ylabel("PC2") plt.show()
class PCA_Benchmark(): def __init__(self,prop_path=None,srch_path=None,chunksize = 1000,batch_size =300,only=None,pca_type='IPCA',kernel_type='linear',whiten=False,model_name=''): self.model_name = model_name self.kernel_type = kernel_type self.type = pca_type self.chunksize = chunksize self.batch_size = batch_size self.only = only self.whiten = whiten kernel_limit = 3000 # self.input_variables = matrix.property[data.property_attributes].columns.values if self.only == 'prop': self.prop_data = pd.DataFrame(pickle.load( open( prop_path, "rb" )) ) self.srch_data = None # self.prop_data = self.prop_data[self.prop_data.columns.drop(list(self.prop_data.filter(regex='visitor_hist_adr_usd_')))] # self.prop_data = self.prop_data.fillna(0).head(50000)#.drop(columns=[]) if pca_type == 'Kernel': self.prop_data = self.prop_data.head(kernel_limit) elif self.only == 'srch': self.srch_data = pd.DataFrame(pickle.load( open( srch_path, "rb" ) )) self.prop_data = None if pca_type == 'Kernel': self.srch_data = self.srch_data.head(kernel_limit) elif self.only == None: self.prop_data = pickle.load( open( prop_path, "rb" ) ) self.srch_data = pickle.load( open( srch_path, "rb" ) ) if pca_type == 'Kernel': self.prop_data = self.prop_data.head(kernel_limit) self.srch_data = self.srch_data.head(kernel_limit) # self.prop_data = self.prop_data[self.prop_data.columns.drop(list(self.prop_data.filte))] # self.prop_data = self.prop_data#.drop(columns=[]) # print('props') # print(len(list(self.prop_data.columns))) # print(list(self.prop_data.columns)) # print('srchs') # print(len(list(self.srch_data.columns))) # print(list(self.srch_data.columns)) if only == 'prop' or only is None: self.prop_data = self.to_numpy(self.prop_data) if self.only == 'prop' or only == None else self.prop_data if only == 'srch' or only is None: # self.srch_data = self.to_numpy(self.srch_data) if self.only == 'srch' or only == None else self.srch_data self.srch_data = self.to_numpy(self.srch_data) if self.only == 'srch' or only == None else self.srch_data def to_numpy(self,matrix): return matrix.to_numpy() def run_props(self): # data = self.prop_data.to_numpy() if self.type == 'IPCA': self.prop_pca = IncrementalPCA(n_components=self.prop_data.shape[1]-1, batch_size=self.batch_size,whiten=self.whiten) elif self.type == 'Kernel': self.prop_pca = KernelPCA(n_components=self.prop_data.shape[1]-1,kernel=self.kernel_type) # chunk_size = 300 # for i in range(0, data.shape[1]//self.chunksize): # self.prop_pca.partial_fit(data[i*self.chunksize : (i+1)*self.chunksize]) # for i in range(0, num_rows//chunk_size): # data[i*self.chunksize:(i+1) * self.chunksize] = ipca.transform(features[i*self.chunksize : (i+1)*self.chunksize]) print(self.prop_data.shape) self.prop_pca = self.prop_pca.fit(self.prop_data) kpca_transform = self.prop_pca.transform(self.prop_data) explained_variance = np.var(kpca_transform, axis=0) explained_variance_ratio = explained_variance / np.sum(explained_variance) # foo = '_whitened_' if self.whiten else '_' # model_name = self.type + '_prop' + foo +'model.pkl' pickle.dump( self.prop_pca , open(self.model_name , "wb" ) ) return explained_variance_ratio def run_srchs(self): if self.type == 'IPCA': self.srch_pca = IncrementalPCA(n_components=self.srch_data.shape[1]-1, batch_size=self.batch_size,whiten=self.whiten) elif self.type == 'Kernel': self.srch_pca = KernelPCA(n_components=self.srch_data.shape[1]-1,kernel=self.kernel_type) # chunk_size = 300 # for i in range(0, data.shape[1]//self.chunksize): # self.srch_pca.partial_fit(data[i*self.chunksize : (i+1)*self.chunksize]) self.srch_pca = self.srch_pca.fit(self.srch_data) kpca_transform = self.srch_pca.transform(self.srch_data) # foo = '_whitened_' if self.whiten else '_' # model_name = self.type + '_srch' + foo +'model.pkl' pickle.dump( self.srch_pca , open(self.model_name, "wb" ) ) # pickle.dump(self.srch_pca, open( self.type+'_srch_model.pkl', "wb" ) ) explained_variance = np.var(kpca_transform, axis=0) explained_variance_ratio = explained_variance / np.sum(explained_variance) return explained_variance_ratio def run_all(self,show=True,save=True): exp_var_ratio_props = None exp_var_ratio_srchs = None if self.only == 'prop' or self.only == None: exp_var_ratio_props = self.run_props() plt.figure(1) plt.plot(np.cumsum(exp_var_ratio_props)) foo = 'Whitened' if self.whiten else '' plt.title(self.type+' ' +foo + ' Principal Components Cumulative Explained Variance For Properties') plt.xlabel('number of components') plt.ylabel('cumulative explained variance') # foo = 'whitened_' if self.whiten else '_' # plot_name = self.type + '_prop_' + foo +'model' plt.savefig('plots/'+self.type+'_'+foo+'_cum_explained_var_prop.png') if self.only== 'srch' or self.only == None: exp_var_ratio_srchs = self.run_srchs() plt.figure(2) plt.plot(np.cumsum(exp_var_ratio_srchs)) foo = 'Whitened' if self.whiten else '' plt.title(self.type+' ' + foo + ' Principal Components Cumulative Explained Variance For Queries') plt.xlabel('number of components') plt.ylabel('cumulative explained variance') plt.savefig('plots/'+self.type+'_'+foo+'_cum_explained_var_srch.png') if show: plt.show() # if save: # if self.only == 'prop' or self.only is None: # plt.savefig('plots/IPCA_cum_explained_var_props.png') # return exp_var_ratio_props,None # if self.only == 'srch' or self.only is None: # plt.savefig('plots/IPCA_cum_explained_var_srchs.png') # return None,exp_var_ratio_srchs return exp_var_ratio_props, exp_var_ratio_srchs
D=np.array(pdB) print(C.shape,D.shape) corr_matrix = pdA.corr().abs() upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool)) to_drop = [column for column in upper.columns if any(upper[column] > 0.95)] print(len(to_drop)) pdA.drop(labels=to_drop, axis=1, inplace=True) pdB.drop(labels=to_drop, axis=1, inplace=True) G=np.array(pdA) H=np.array(pdB) print(G.shape,H.shape) print(G.shape,H.shape) pca = KernelPCA(n_components=95) pca.fit(np.concatenate((G,H))) GG = pca.transform(G) HH = pca.transform(H) print(GG.shape,HH.shape) clf= MLPClassifier(max_iter=2000) from sklearn.model_selection import ShuffleSplit ss=ShuffleSplit(n_splits=20, test_size=0.5, random_state=10) scores = cross_val_score(clf, GG,xlab, cv=ss,n_jobs=-1, verbose=1) print(np.mean(scores)) print(scores) clf.fit(GG, xlab) pred= clf.predict(HH) print (len(pred))
plt.grid(True) if (0): #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%# #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%# #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%# d = pair.pairwise_distances(Xtrain,Xtrain) aux = np.triu(d) sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5)) gamma = 1/(2*sigma**2) if (0): #%% K-PCA # Calculate accumulated variance kpca = KernelPCA(kernel="rbf",gamma=gamma) kpca.fit_transform(Xtrain) eigenvals = kpca.lambdas_[0:220] # Calculate classifiation scores for each component nComponents = np.linspace(1, 500, 100, endpoint=True) kpcaScores = np.zeros((5,np.alen(nComponents))) kpca = KernelPCA(n_components = Ntrain,kernel="rbf",gamma=gamma) kpca.fit(Xtrain) XtrainT = kpca.transform(Xtrain) XtestT = kpca.transform(Xtest) for i in range(len(nComponents)):
# import seaborn as sns import sklearn.decomposition from sklearn.decomposition import PCA dimreductiontype='pca' from sklearn.decomposition import PCA,KernelPCA,FactorAnalysis if(dimreductiontype=='pca'): pca = PCA(n_components = nr ,whiten=True)#min(df.shape)) elif(dimreductiontype=='kpca'): pca = KernelPCA(n_components=min(df.shape)) elif(dimreductiontype=='fa'): pca = FactorAnalysis(n_components=min(df.shape)) Z = pca.fit_transform(X) try: print("pca.n_components ", pca.n_components) print("pca.n_features_ ", pca.n_features_) print("pca.n_samples_ ", pca.n_samples_) print('pca.noise_variance_ ', pca.noise_variance_) except Exception: 1; try: ax,fig=plt.subplots(1,1)
def main(): # ----- settings: dataset = 'MNIST' # --> 'Facial' or 'MNIST' or 'Breast_cancer' embedding_method = 'Isomap' n_components = 5 split_in_cross_validation_again = False load_dataset_again = False subset_of_MNIST = True pick_subset_of_MNIST_again = False MNIST_subset_cardinality_training = 10000 # picking from first samples of 60,000 samples MNIST_subset_cardinality_testing = 5000 # picking from first samples of 10,000 samples # ----- paths: if dataset == 'Facial': path_dataset = './input/att_database/' path_dataset_save = './input/pickle_dataset/Facial/' elif dataset == 'MNIST': path_dataset = './input/mnist/' path_dataset_save = './input/pickle_dataset/MNIST/' elif dataset == 'Breast_cancer': path_dataset = './input/Breast_cancer_dataset/wdbc_data.txt' path_dataset_save = './input/pickle_dataset/MNIST/' # ----- Loading dataset: print('Reading dataset...') if dataset == 'MNIST': if load_dataset_again: training_data = list( read_MNIST_dataset(dataset="training", path=path_dataset)) testing_data = list( read_MNIST_dataset(dataset="testing", path=path_dataset)) number_of_training_samples = len(training_data) dimension_of_data = 28 * 28 X_train = np.empty((0, dimension_of_data)) y_train = np.empty((0, 1)) for sample_index in range(number_of_training_samples): if np.mod(sample_index, 1) == 0: print('sample ' + str(sample_index) + ' from ' + str(number_of_training_samples) + ' samples...') label, pixels = training_data[sample_index] pixels_reshaped = np.reshape(pixels, (1, 28 * 28)) X_train = np.vstack([X_train, pixels_reshaped]) y_train = np.vstack([y_train, label]) y_train = y_train.ravel() number_of_testing_samples = len(testing_data) dimension_of_data = 28 * 28 X_test = np.empty((0, dimension_of_data)) y_test = np.empty((0, 1)) for sample_index in range(number_of_testing_samples): if np.mod(sample_index, 1) == 0: print('sample ' + str(sample_index) + ' from ' + str(number_of_testing_samples) + ' samples...') label, pixels = testing_data[sample_index] pixels_reshaped = np.reshape(pixels, (1, 28 * 28)) X_test = np.vstack([X_test, pixels_reshaped]) y_test = np.vstack([y_test, label]) y_test = y_test.ravel() save_variable(X_train, 'X_train', path_to_save=path_dataset_save) save_variable(y_train, 'y_train', path_to_save=path_dataset_save) save_variable(X_test, 'X_test', path_to_save=path_dataset_save) save_variable(y_test, 'y_test', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X_train.pckl', 'rb') X_train = pickle.load(file) file.close() file = open(path_dataset_save + 'y_train.pckl', 'rb') y_train = pickle.load(file) file.close() file = open(path_dataset_save + 'X_test.pckl', 'rb') X_test = pickle.load(file) file.close() file = open(path_dataset_save + 'y_test.pckl', 'rb') y_test = pickle.load(file) file.close() if subset_of_MNIST: if pick_subset_of_MNIST_again: X_train_picked = X_train[ 0:MNIST_subset_cardinality_training, :] X_test_picked = X_test[0:MNIST_subset_cardinality_testing, :] y_train_picked = y_train[0:MNIST_subset_cardinality_training] y_test_picked = y_test[0:MNIST_subset_cardinality_testing] save_variable(X_train_picked, 'X_train_picked', path_to_save=path_dataset_save) save_variable(X_test_picked, 'X_test_picked', path_to_save=path_dataset_save) save_variable(y_train_picked, 'y_train_picked', path_to_save=path_dataset_save) save_variable(y_test_picked, 'y_test_picked', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X_train_picked.pckl', 'rb') X_train_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'X_test_picked.pckl', 'rb') X_test_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_train_picked.pckl', 'rb') y_train_picked = pickle.load(file) file.close() file = open(path_dataset_save + 'y_test_picked.pckl', 'rb') y_test_picked = pickle.load(file) file.close() X_train = X_train_picked X_test = X_test_picked y_train = y_train_picked y_test = y_test_picked image_shape = (28, 28) elif dataset == 'Facial': if load_dataset_again: X, y, image_shape = read_image_dataset(dataset_path=path_dataset, imagesType='.jpg') save_variable(variable=X, name_of_variable='X', path_to_save=path_dataset_save) save_variable(variable=y, name_of_variable='y', path_to_save=path_dataset_save) save_variable(variable=image_shape, name_of_variable='image_shape', path_to_save=path_dataset_save) else: file = open(path_dataset_save + 'X.pckl', 'rb') X = pickle.load(file) file.close() file = open(path_dataset_save + 'y.pckl', 'rb') y = pickle.load(file) file.close() file = open(path_dataset_save + 'image_shape.pckl', 'rb') image_shape = pickle.load(file) file.close() elif dataset == 'Breast_cancer': data = pd.read_csv( path_dataset, sep=",", header=None ) # read text file using pandas dataFrame: https://stackoverflow.com/questions/21546739/load-data-from-txt-with-pandas labels_of_classes = ['M', 'B'] X, y = read_BreastCancer_dataset(data=data, labels_of_classes=labels_of_classes) X = X.astype( np.float64 ) #---> otherwise MDS has error --> https://stackoverflow.com/questions/16990996/multidimensional-scaling-fitting-in-numpy-pandas-and-sklearn-valueerror # --- cross validation: path_to_save = './input/split_data/' portion_of_test_in_dataset = 0.3 number_of_folds = 10 if split_in_cross_validation_again: train_indices_in_folds, test_indices_in_folds, \ X_train_in_folds, X_test_in_folds, y_train_in_folds, y_test_in_folds = \ cross_validation(X=X, y=y, n_splits=number_of_folds, test_size=portion_of_test_in_dataset) save_variable(train_indices_in_folds, 'train_indices_in_folds', path_to_save=path_to_save) save_variable(test_indices_in_folds, 'test_indices_in_folds', path_to_save=path_to_save) save_variable(X_train_in_folds, 'X_train_in_folds', path_to_save=path_to_save) save_variable(X_test_in_folds, 'X_test_in_folds', path_to_save=path_to_save) save_variable(y_train_in_folds, 'y_train_in_folds', path_to_save=path_to_save) save_variable(y_test_in_folds, 'y_test_in_folds', path_to_save=path_to_save) for fold_index in range(number_of_folds): save_np_array_to_txt(np.asarray( train_indices_in_folds[fold_index]), 'train_indices_in_fold' + str(fold_index), path_to_save=path_to_save) save_np_array_to_txt(np.asarray( test_indices_in_folds[fold_index]), 'test_indices_in_folds' + str(fold_index), path_to_save=path_to_save) else: file = open(path_to_save + 'train_indices_in_folds.pckl', 'rb') train_indices_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'test_indices_in_folds.pckl', 'rb') test_indices_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'X_train_in_folds.pckl', 'rb') X_train_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'X_test_in_folds.pckl', 'rb') X_test_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'y_train_in_folds.pckl', 'rb') y_train_in_folds = pickle.load(file) file.close() file = open(path_to_save + 'y_test_in_folds.pckl', 'rb') y_test_in_folds = pickle.load(file) file.close() print(X_train.shape) print(X_test.shape) # ----- embedding: print('Embedding...') if dataset == 'MNIST': # plot_components(X_projected=X_projected, images=X.reshape((-1, image_shape[0], image_shape[1])), ax=ax, image_scale=0.6, markersize=10, thumb_frac=0.05, cmap='gray_r') # ----- embedding: if embedding_method == 'LLE': clf = LLE(n_neighbors=5, n_components=n_components, method='standard') clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'Isomap': clf = Isomap(n_neighbors=5, n_components=n_components) clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'MDS': clf = MDS(n_components=n_components) X_projected = clf.fit_transform(X=np.vstack([X_train, X_test])) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'PCA': clf = PCA(n_components=n_components) clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'KernelPCA': clf = KernelPCA(n_components=n_components, kernel='rbf') clf.fit(X=X_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'LaplacianEigenmap': clf = LaplacianEigenmap(n_neighbors=5, n_components=n_components) X_projected = clf.fit_transform(X=np.vstack([X_train, X_test])) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'LDA': clf = LDA(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'SPCA': clf = SPCA(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'TSNE': clf = TSNE(n_components=min(3, n_components)) # print(type(list(y_train))) X_projected = clf.fit_transform( X=np.vstack([X_train, X_test]), y=np.asarray(list(y_train) + list(y_test))) X_train_projected = X_projected[:X_train.shape[0], :] X_test_projected = X_projected[X_train.shape[0]:, :] elif embedding_method == 'ML': clf = ML(n_components=n_components) clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'Kernel_FLDA': clf = Kernel_FLDA(n_components=n_components, kernel='linear') clf.fit(X=X_train, y=y_train) X_train_projected = clf.transform(X=X_train) X_test_projected = clf.transform(X=X_test) elif embedding_method == 'No_embedding': X_train_projected = X_train X_test_projected = X_test # --- classification: print('Classification...') # clf = KNN(n_neighbors=1) clf = NB() clf.fit(X=X_train_projected, y=y_train) y_pred = clf.predict(X=X_test_projected) accuracy = accuracy_score(y_true=y_test, y_pred=y_pred) error = 1 - accuracy_score(y_true=y_test, y_pred=y_pred) # --- saving results: save_variable(accuracy, 'accuracy', path_to_save='./output/MNIST/') save_np_array_to_txt(np.asarray(accuracy), 'accuracy', path_to_save='./output/MNIST/') save_variable(error, 'error', path_to_save='./output/MNIST/') save_np_array_to_txt(np.asarray(error), 'error', path_to_save='./output/MNIST/') # --- report results: print(' ') print('Accuracy: ', accuracy * 100) print(' ') print('Error: ', error * 100)
X = dataset.iloc[:, [2, 3]].values y = dataset.iloc[:, 4].values # Splitting the dataset into the Training set and Test set from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Applying Kernel PCA from sklearn.decomposition import KernelPCA kpca = KernelPCA(n_components = 2, kernel = 'rbf') X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) # Fitting Logistic Regression to the Training set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state = 0) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = classifier.predict(X_test) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred)
catb = plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', marker='s', alpha=0.5) plt.show() plt.close() gamma = 5 K_lap = laplacian_kernel(X, gamma=gamma) kpcas = [] kpcas.append( ('Linear KPCA', 'lin_kpca', KernelPCA(n_components=2, kernel='linear'))) kpcas.append(('RBF KPCA', 'rbf_kpca', KernelPCA(n_components=2, kernel='rbf', gamma=gamma))) kpcas.append(('Laplacian KPCA', 'lap_kpca', KernelPCA(n_components=2, kernel='precomputed'))) kpcas.append(('Sigmoid KPCA', 'sig_kpca', KernelPCA(n_components=2, kernel='sigmoid', gamma=gamma))) kpcas.append(('Cosine KPCA', 'cos_kpca', KernelPCA(n_components=2, kernel='cosine', gamma=gamma))) for kernel, abbreviation, kpca in kpcas: if kernel == 'Laplacian KPCA': X_kpca = kpca.fit_transform(K_lap) else: X_kpca = kpca.fit_transform(X)
from sklearn.preprocessing import StandardScaler from sklearn.decomposition import KernelPCA from sklearn.linear_model import LogisticRegression from sklearn.metrics import confusion_matrix, accuracy_score dataset = pd.read_csv('wine.csv') x = dataset.iloc[:, :-1].values y = dataset.iloc[:, -1].values x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0) stc = StandardScaler() x_train = stc.fit_transform(x_train) x_test = stc.transform(x_test) kernel_pca = KernelPCA(n_components=2, kernel='rbf') x_train = kernel_pca.fit_transform(x_train) x_test = kernel_pca.transform(x_test) classifier = LogisticRegression() classifier.fit(x_train, y_train) y_pred = classifier.predict(x_test) cm = confusion_matrix(y_test, y_pred) print('Confusion matrix of the model is:', cm) acc = accuracy_score(y_test, y_pred) print('Accuracy of the model is:', acc) x_set = x_train y_set = y_train x1, x2 = np.meshgrid(np.arange(min(x_set[:, 0]), max(x_set[:, 0]), step=0.01),