def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert_equal(kwargs, {})  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed.size, 0)

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert_equal(X_pred2.shape, X_pred.shape)
示例#2
0
def Kernel_PCA(HE_MI_train_test, kernel, invTran, degree):
    '''
    개요
        - Kernel PCA 을 적용한다.
    '''

    MyDataSet = HE_MI_train_test
    my_HEtraining = MyDataSet[0]
    my_MItraining = MyDataSet[1]
    my_HEtest = MyDataSet[2]
    my_MItest = MyDataSet[3]

    kpca = KernelPCA(kernel=kernel, fit_inverse_transform=invTran, degree=degree)
    HE_training_kpca = kpca.fit_transform(my_HEtraining)
    MI_training_kpca = kpca.fit_transform(my_MItraining)
    HE_test_kpca = kpca.fit_transform(my_HEtest)
    MI_test_kpca = kpca.fit_transform(my_MItest)



    HE_training_KPCA_2dim = [];
    MI_training_KPCA_2dim = []
    HE_test_KPCA_2dim = [];
    MI_test_KPCA_2dim = []

    for pt in HE_training_kpca:
        HE_training_KPCA_2dim.append((pt[0], pt[1]))
    for pt in MI_training_kpca:
        MI_training_KPCA_2dim.append((pt[0], pt[1]))
    for pt in HE_test_kpca:
        HE_test_KPCA_2dim.append((pt[0], pt[1]))
    for pt in MI_test_kpca:
        MI_test_KPCA_2dim.append((pt[0], pt[1]))

    return [HE_training_KPCA_2dim, MI_training_KPCA_2dim, HE_test_KPCA_2dim, MI_test_KPCA_2dim]
class RegionSplitter_PCA_KMean():
    def __init__(self, data, label):

        data_dim_num = len(data[0])
        label_dim_num = len(label[0])

        self.n_comp = max(1, data_dim_num)

        self.pca = PCA(n_components=self.n_comp)

        data = self.pca.fit_transform(data)
        data_zipped = list(zip(*data))

        # k-mean cluster for the dimension
        self.clusterer = KMeans(n_clusters=2, init='k-means++')

        self.clusterer.fit(list(zip(*data_zipped)))


    def classify(self, data):
        if not isinstance(data, tuple):
            raise(TypeError, "data must be a tuple")

        data = tuple(self.pca.transform(data)[0])
        group = self.clusterer.predict(data)

        return group == 0
示例#4
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed, [])

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
def kPCA_visualization1d(X, y):
   
    kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA(n_components=1)
    X_pca = pca.fit_transform(X)

    class_1 = []
    class_0 = []

    for i in range(0, len(y)):
        
        if y[i] == 1:
            class_1.append(  list( X_kpca[i] )[0] )
        else:
            class_0.append(  list( X_kpca[i] )[0] )
    print "check"
    print class_1[:10]
    import numpy
    from matplotlib import pyplot
    

    pyplot.hist(class_1, 50, alpha=0.5, label='class 1' )  
    pyplot.hist(class_0, 50, alpha=0.5, label='class 0')

    pyplot.legend(loc='upper right')
    pyplot.show()
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1',
                                         standardize_numeric=False, to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel,
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
示例#7
0
 def MyPCA():
     X,y = circle_data()
     kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma= 10)
     X_kpca = kpca.fit_transform(X)
     pca = PCA()
     x_pca = pca.fit_transform(X)
     return X_kpca
    def perform_pca(self):
        """consider principle components as covariates, will be appended to self.X

        num_pcs : int
            Number of principle components to use as covariates

        
        K = self._centerer.fit_transform(K)

        # compute eigenvectors
        if self.eigen_solver == 'auto':
            if K.shape[0] > 200 and n_components < 10:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self.eigen_solver

        if eigen_solver == 'dense':
            self.lambdas_, self.alphas_ = linalg.eigh(
                K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
        elif eigen_solver == 'arpack':
            self.lambdas_, self.alphas_ = eigsh(K, n_components,
                                                which="LA",
                                                tol=self.tol,
                                                maxiter=self.max_iter)

        # sort eigenvectors in descending order
        indices = self.lambdas_.argsort()[::-1]
        self.lambdas_ = self.lambdas_[indices]
        self.alphas_ = self.alphas_[:, indices]

        # remove eigenvectors with a zero eigenvalue
        if self.remove_zero_eig or self.n_components is None:
            self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
            self.lambdas_ = self.lambdas_[self.lambdas_ > 0]

        X_transformed = self.alphas_ * np.sqrt(self.lambdas_)

        """
        #TODO: implement numerics code directly, based on above template

        logging.info("performing PCA, keeping %i principle components" % (self.num_pcs))
        tt0 = time.time()
        if False:
            pca = KernelPCA(n_components=self.num_pcs)
            pca._fit_transform(self.K)
            self.pcs = pca.alphas_ * np.sqrt(pca.lambdas_)
        else:
            import scipy.linalg as la
            [s,u]=la.eigh(self.K)
            s=s[::-1]
            u=u[:,::-1]
            self.pcs = u[:,0:self.num_pcs]
        assert self.pcs.shape[1] == self.num_pcs

        self.X = sp.hstack((self.X, self.pcs))  

        logging.info("...done. PCA time %.2f s" % (float(time.time() - tt0)))
示例#9
0
文件: test1.py 项目: fferri/wir
def pca(X, gamma1):
    kpca = KernelPCA(kernel='rbf', fit_inverse_transform=False, gamma=gamma1)
    X_kpca = kpca.fit_transform(X)
    print('X', X.shape)
    print('alphas', kpca.alphas_.shape)
    print('lambdas', kpca.lambdas_.shape)
    #X_back = kpca.inverse_transform(X_kpca)
    return X_kpca
示例#10
0
文件: embed.py 项目: all-umass/graphs
  def isomap(self, num_dims=None, directed=None):
    '''Isomap embedding.

    num_dims : dimension of embedded coordinates, defaults to input dimension
    directed : used for .shortest_path() calculation
    '''
    W = -0.5 * self.shortest_path(directed=directed) ** 2
    kpca = KernelPCA(n_components=num_dims, kernel='precomputed')
    return kpca.fit_transform(W)
示例#11
0
	def __init__(self,corpus,n_components=2,kernel=None):
		StyloClassifier.__init__(self,corpus)
		data = self.data_frame[self.cols].values
		self.n_components = n_components
		self.kernel = kernel
		if not kernel:
			self.pca = PCA(n_components=self.n_components)
		else:
			self.pca = KernelPCA(kernel=kernel, gamma=10)
		self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))
def getProjectionMatrixKPCA(dim=50):
    """ Kernel PCA : see paper for detailed description"""
    # Create an X for the hierarchy
    X = np.zeros((len(labelDict), len(labelDict)))
    for item in labelDict:
        pars = getPathToRoot(item)
        for par in pars:
            X[labelIndex[item]][labelIndex[par]] = 1
    kpca = KernelPCA(n_components=dim, fit_inverse_transform=True)
    X_kpca = kpca.fit(X)
    return kpca, kpca.alphas_
示例#13
0
def main():
    definition = load_definition()
    data = np.load(os.path.join(ROOT, definition.embedding))
    uuids = np.load(os.path.join(ROOT, definition.uuids))

    pca = KernelPCA(**definition.pca)
    tsne = TSNE(**definition.tsne)
    data = pca.fit_transform(data)
    data = tsne.fit_transform(data)

    plot_vectors(data, uuids, definition.sources, definition.output)
示例#14
0
def test_kernel_pca_n_components():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("dense", "arpack"):
        for c in [1, 2, 4]:
            kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
            shape = kpca.fit(X_fit).transform(X_pred).shape

            assert_equal(shape, (2, c))
示例#15
0
def test_kernel_pca_consistent_transform():
    # X_fit_ needs to retain the old, unmodified copy of X
    state = np.random.RandomState(0)
    X = state.rand(10, 10)
    kpca = KernelPCA(random_state=state).fit(X)
    transformed1 = kpca.transform(X)

    X_copy = X.copy()
    X[:, 0] = 666
    transformed2 = kpca.transform(X_copy)
    assert_array_almost_equal(transformed1, transformed2)
示例#16
0
 def fit(self,X, num, method='dijkstra'):
     # Construct k-neigh. graph
     knn = KNN(num).fit(X)
     #Find shortest path
     if method == 'dijkstra':
         result = dijkstra(knn)
     else:
         result = shortest_path(knn, method=method)
     #Multidimensional scaling
     #Can be used Kernel PCA
     model = KernelPCA(n_components=num)
     return model.fit_transform(result)
def kernelPCA(data, labels, new_dimension):
    print "start kernel pca..."

    if hasattr(data, "toarray"):
        data = data.toarray()

    start = time.time()
    pca = KernelPCA(fit_inverse_transform=True, gamma=10, n_components=new_dimension, alpha=2)

    reduced = pca.fit_transform(data)
    end = time.time()
    return (reduced, end-start)
示例#18
0
def isomap(X, n_neighbors, metric):
    """
        Based on sklearn,
        Author: Jake Vanderplas  -- <*****@*****.**>
        License: BSD, (C) 2011
    """    
    
    kng = kneighbors_graph(D, n_neighbors = n_neighbors, metric = metric)    
    dist_matrix_ = graph_shortest_path(kng, method='auto', directed=False)    
    kernel_pca_ = KernelPCA(n_components=2, kernel="precomputed", eigen_solver='auto')
    G = dist_matrix_ ** 2
    G *= -0.5
    return kernel_pca_.fit_transform(G)
示例#19
0
def reduce_kpca(X, kern, retall=False):
    """ reduce_kpca(X, components, kern, retall=False)
    Reduce dim by Kernel PCA
    """

    kpca = KernelPCA(kernel=kern, fit_inverse_transform=True)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)

    if not retall:
        return X_kpca, X_back
    else:
        return X_kpca, X_back, kpca
def test_kernel_pca_deterministic_output():
    rng = np.random.RandomState(0)
    X = rng.rand(10, 10)
    eigen_solver = ('arpack', 'dense')

    for solver in eigen_solver:
        transformed_X = np.zeros((20, 2))
        for i in range(20):
            kpca = KernelPCA(n_components=2, eigen_solver=solver,
                             random_state=rng)
            transformed_X[i, :] = kpca.fit_transform(X)[0]
        assert_allclose(
            transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
示例#21
0
    def RunKPCAScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      data = np.genfromtxt(self.dataset, delimiter=',')

      with totalTimer:
        # Get the new dimensionality, if it is necessary.
        dimension = re.search('-d (\d+)', options)
        if not dimension:
          d = data.shape[1]
        else:
          d = int(dimension.group(1))
          if (d > data.shape[1]):
            Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
            q.put(-1)
            return -1

        # Get the kernel type and make sure it is valid.
        kernel = re.search("-k ([^\s]+)", options)
        try:
          if not kernel:
            Log.Fatal("Choose kernel type, valid choices are 'linear'," +
                  " 'hyptan' and 'polynomial'.")
            q.put(-1)
            return -1
          elif kernel.group(1) == "linear":
            model = KernelPCA(n_components=d, kernel="linear")
          elif kernel.group(1) == "hyptan":
            model = KernelPCA(n_components=d, kernel="sigmoid")
          elif kernel.group(1) == "polynomial":
            degree = re.search('-D (\d+)', options)
            degree = 1 if not degree else int(degree.group(1))

            model = KernelPCA(n_components=d, kernel="poly", degree=degree)
          else:
            Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
                "choices are 'linear', 'hyptan' and 'polynomial'.")
            q.put(-1)
            return -1

          out = model.fit_transform(data)
        except Exception as e:
          q.put(-1)
          return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
示例#22
0
def generate_kpca_compression(X, n_components=16):
    """
    Compresses the data using sklearn KernelPCA implementation.

    :param X: Data (n_samples, n_features)
    :param n_components: Number of dimensions for PCA to keep

    :return: X_prime (the compressed representation), pca
    """

    kpca = KernelPCA(n_components=n_components, kernel='rbf', eigen_solver='arpack', fit_inverse_transform=False)
    kpca.fit(X)

    return kpca.transform(X), kpca
 def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
示例#25
0
文件: cluster.py 项目: ataylor-cs/wmf
def project(X, kde = False, kernel = False, gamma = 10):
    if kernel:
        kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=gamma)
        reduced_data = kpca.fit_transform(X)
    else:
        pca = PCA(n_components=2).fit(X)
        print pca.explained_variance_ratio_ 
        print pca.components_
        reduced_data = pca.transform(X)
    if kde:
        with sns.axes_style("white"):
            sns.jointplot(reduced_data[:, 0], reduced_data[:, 1], kind="kde");
        plt.show()
    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    return reduced_data
def gogo_kpca( fxpath, mpath ):
    
    kpca_params = {'n_components':256,
                   'kernel':'rbf',
                   'gamma':None,
                   'degree':3,
                   'coef0':1,
                   'kernel_params':None,
                   'alpha':1.0,
                   'fit_inverse_transform':False,
                   'eigen_solver':'auto',
                   'tol':0,
                   'max_iter':None,
                   'remove_zero_eig':True}

    kpca_fname = '%s/kpca_rbf_{0}_{1}.pkl' % mpath

    for i in range(7):
        if i < 5:
            nbreed = 1
            sbreed = 'dog'
            nsubject = i+1
        else:
            nbreed = 2
            sbreed = 'human'
            nsubject = 1 + abs(5-i)

        print 'breed%d.subject%d..' % ( nbreed, nsubject )

        X_ictal = load_features( fxpath, nbreed, nsubject, 1 )
        X_inter = load_features( fxpath, nbreed, nsubject, 2 )

        X = vstack((X_inter, X_ictal))
        del X_inter, X_ictal; gc.collect()

        X_test = load_features( fxpath, nbreed, nsubject, 3 )
    
        X = vstack((X, X_test))
        del X_test; gc.collect()
    
        kpca = KernelPCA(**kpca_params)
        skip_interval = get_skip_interval(X)
        X = kpca_preprocess_features(X)
        kpca.fit(X[::skip_interval])
        with open(kpca_fname.format(sbreed,nsubject),'wb') as f:
            cPickle.dump(kpca,f)

        del X, kpca; gc.collect()
def test_kernel_pca_sparse():
    rng = np.random.RandomState(0)
    X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
    X_pred = sp.csr_matrix(rng.random_sample((2, 4)))

    for eigen_solver in ("auto", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=False)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])
示例#28
0
  def _fit_transform(self, X):
    self.nbrs_.fit(X)
    self.training_data_ = self.nbrs_._fit_X 
    self.kernel_pca_ = KernelPCA(n_components=self.n_components,
                                  kernel="precomputed",
                                  eigen_solver=self.eigen_solver,
                                  tol=self.tol, max_iter=self.max_iter)
    
    kng = kneighbors_graph(self.nbrs_, self.n_neighbors, mode="distance")
    n_points = X.shape[0]
    n_workers = blob_ctx.get().num_workers

    if n_points < n_workers:
      tile_hint = (1, )
    else:
      tile_hint = (n_points / n_workers, )

    """
    task_array is used for deciding the idx of starting points and idx of endding points 
    that each tile needs to find the shortest path among.
    """
    task_array = expr.ndarray((n_points,), tile_hint=tile_hint)
    task_array = task_array.force()
    
    #dist matrix is used to hold the result
    dist_matrix = expr.ndarray((n_points, n_points), reduce_fn=lambda a,b:a+b).force()
    results = task_array.foreach_tile(mapper_fn = _shortest_path_mapper,
                                      kw = {'kng' : kng,
                                            'directed' : False,
                                            'dist_matrix' : dist_matrix})
    self.dist_matrix_ = dist_matrix.glom()
    G = self.dist_matrix_ ** 2
    G *= -0.5
    self.embedding_ = self.kernel_pca_.fit_transform(G)
示例#29
0
def main():

	#set the timer
	start = time.time()

	#load the data
	mnist = fetch_mldata('MNIST original')
	mnist.target = mnist.target.astype(np.int32)

	seed = np.random.randint(1,30000)
	rand = np.random.RandomState(seed)
	items = len(mnist.target)
	indices = rand.randint(items, size = 70000)
	trindex = indices[0:30000]
	tsindex = indices[30000:]

	#scale down features to the range [0, 1]
	mnist.data = mnist.data/255.0
	mnist.data = mnist.data.astype(np.float32)

	trainX = mnist.data[trindex]
	testX = mnist.data[tsindex]
	trainY = mnist.target[trindex]
	testY = mnist.target[tsindex]

	#extract the features using KPCA
	kpca = KernelPCA(kernel='precomputed')
	kpca_train = arc_cosine(trainX[0:1000], trainX[0:1000])
	#Fit the model from data in X
	kpca.fit(kpca_train)

	kernel_train = arc_cosine(trainX, trainX[0:1000])
	kernel_test = arc_cosine(testX, trainX[0:1000])

	trainX_kpca = kpca.transform(kernel_train)
	testX_kpca = kpca.transform(kernel_test)
	print testX_kpca.shape

	#fit the svm model and compute accuaracy measure
	clf = svm.SVC(kernel=arc_cosine)
	clf.fit(trainX_kpca, trainY)

	pred = clf.predict(testX_kpca)
	print accuracy_score(testY, pred)
	print('total : %d, correct : %d, incorrect : %d\n' %(len(pred), np.sum(pred == testY), np.sum(pred != testY)))

	print('Test Time : %f Minutes\n' %((time.time()-start)/60))
    def __init__(self, data, label):

        self.cut_dim = 0
        self.cut_val = 0
        num_candidates = 50

        data_dim_num = len(data[0])
        label_dim_num = len(label[0])

        self.n_comp = max(1, data_dim_num)

        self.pca = PCA(n_components=self.n_comp, kernel='linear')
        # self.ica = ICA(n_components=self.n_comp)

        data = self.pca.fit_transform(data)
        #data = self.ica.fit_transform(data)

        data_zipped = list(zip(*data))

        data_dim_num = len(data[0])
        label_dim_num = len(label[0])


        # sort in each dimension
        dim_min = float("inf")
        for i in range(data_dim_num):

            for k in range(num_candidates):
                # pick a random value
                max_val = max(data_zipped[i])
                min_val = min(data_zipped[i])
                cut_val = random.choice(np.linspace(min_val, max_val, num=500))

                groups = [[label[j] for j in range(len(data_zipped[i])) if data_zipped[i][j] <= cut_val],
                          [label[j] for j in range(len(data_zipped[i])) if data_zipped[i][j] > cut_val]]

                # check if any of the group is 0
                if len(groups[0]) == 0 or len(groups[1]) == 0:
                    continue

                weighted_avg_variance = []
                for group in groups:
                    num_sample = len(group)
                    group = zip(*group)

                    variance = []
                    for group_k in group:
                        mean = math.fsum(group_k)/len(group_k)
                        norm = max(math.fsum([x**2 for x in group_k])/len(group_k), 1)
                        variance.append(math.fsum([((x - mean)**2)/norm for x in group_k]))
                    weighted_avg_variance.append(math.fsum(variance)/len(variance)*num_sample)

                in_group_variance = math.fsum(weighted_avg_variance)

                if in_group_variance < dim_min:

                    dim_min = in_group_variance
                    self.cut_dim = i
                    self.cut_val = cut_val
示例#31
0
    PCA_S = pca.explained_variance_ratio_  # Percentage of variance that each component explains (eigenvectors)
    PCA_mean = pca.mean_
    Xtrain_PCA = pca.fit_transform(Xtrain)  # It obtains the features of the components.PCA
    Xtest_PCA = pca.transform(Xtest)
    
# Kernel PCA
if (FE_kPCA == 1):
    from sklearn.decomposition import KernelPCA
    import sklearn.metrics.pairwise as pair
    # Get proper value for the gamma of the gaussian projection
    d = pair.pairwise_distances(Xtrain,Xtrain)
    aux = np.triu(d)
    sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5))
    gamma = 1/(2*sigma**2)
    
    kpca = KernelPCA(n_components = n_comp,kernel = "rbf",gamma = gamma)
    kpca.fit(Xtrain)
    
    kPCA_hyperplanes = kpca.alphas_  # Components of the descomposition (hyperplanes) (eigenvesctors) (eigenfaces) 
    kPCA_S = kpca.lambdas_           # Percentage of variance that each component explains (eigenvectors)
    
    Xtrain_kPCA = kpca.transform(Xtrain)
    Xtest_kPCA = kpca.transform(Xtest)
    
# ICA Independen Component Analisis
if (FE_ICA == 1):
    from sklearn.decomposition import FastICA
    ica = FastICA(n_components = 15, max_iter = 10000,  tol=0.00001 )
    
    ica.fit(Xtrain)   # Reconstruct signals
    ICA_components = ica.components_  # Get components 
示例#32
0
_file2 = pd.read_csv('arcene_train.labels')
_file1['Class'] = (_file2['1']).astype(int)
#print _file1.isnull()
#print _file1.info()##################################################################################cleaning missing values
_file1 = _file1.fillna(lambda x: x.median())
#print _file1.info()
#print _file1
train, test = train_test_split(_file1, test_size=0.4)
linear_svm = svm.SVC(
    kernel='linear'
)  ########################################################################linear_SVM
rbf_svm = svm.SVC(
    kernel='rbf'
)  ##############################################################################rbf_SVM
kpca = KernelPCA(n_components=55,
                 kernel="rbf",
                 fit_inverse_transform=True,
                 gamma=10)  #######################################KPCA

#print train,test
train = train.values.tolist()
test = test.values.tolist()
#print len(train[:][:]),len(test)
###########################################################################################################################data
x = []
y = []
for i in train:
    x.append(i[:-2])
    y.append(i[-1])
########################################################################################################################end data

###############################################################################################kpca
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(train.drop(["y"], axis=1))
tsvd_results_test = tsvd.transform(test)

# PCA
# pca = PCA(n_components=n_comp, random_state=420)
# pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
# pca2_results_test = pca.transform(test)

#sparse PCA
spca = SparsePCA(n_components=n_comp, random_state=420)
spca2_results_train = spca.fit_transform(train.drop(["y"], axis=1))
spca2_results_test = spca.transform(test)

#Kernel PCA
kpca = KernelPCA(n_components=n_comp, random_state=420)
kpca2_results_train = kpca.fit_transform(train.drop(["y"], axis=1))
kpca2_results_test = kpca.transform(test)

# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train.drop(["y"], axis=1))
grp_results_test = grp.transform(test)

# SRP
srp = SparseRandomProjection(n_components=n_comp,
def pipe_main(pipe=None):
    '''pipeline construction using sklearn estimators, final step support only
    classifiers currently
    
    .. note::
        data flows through a pipeline consisting of steps as below:
            raw data --> clean --> encoding --> scaling --> feature construction 
            --> feature selection --> resampling --> final estimator
            see scikit-learn preprocess & estimators
    parameter
    ----
    pipe - str 
        - in the format of 'xx_xx' of which 'xx' means steps in pipeline,
          default None
    return
    ----
        1) pipeline instance of chosen steps
        2) if pipe is None, a dict indicating possible choice of 'steps'
    '''
    clean = {
        'clean':
        Split_cls(dtype_filter='not_datetime', na1='null', na2=-999),
        'cleanNA':
        Split_cls(dtype_filter='not_datetime', na1=None, na2=None),
        'cleanMean':
        Split_cls(dtype_filter='not_datetime', na1='most_frequent',
                  na2='mean'),
    }
    #
    encode = {
        'woe': Woe_encoder(max_leaf_nodes=5),
        'oht': Oht_encoder(),
        'ordi': Ordi_encoder(),
    }

    resample = {

        # over_sampling
        'rover':
        RandomOverSampler(),
        'smote':
        SMOTE(),
        'bsmote':
        BorderlineSMOTE(),
        'adasyn':
        ADASYN(),

        # under sampling controlled methods
        'runder':
        RandomUnderSampler(),
        'nearmiss':
        NearMiss(version=3),
        'pcart':
        InstanceHardnessThreshold(),

        # under sampling cleaning methods
        'tlinks':
        TomekLinks(n_jobs=-1),
        'oside':
        OneSidedSelection(n_jobs=-1),
        'cleanNN':
        NeighbourhoodCleaningRule(n_jobs=-1),
        'enn':
        EditedNearestNeighbours(n_jobs=-1),
        'ann':
        AllKNN(n_jobs=-1),
        'cnn':
        CondensedNearestNeighbour(n_jobs=-1),

        # clean outliers
        'inlierForest':
        FunctionSampler(outlier_rejection,
                        kw_args={'method': 'IsolationForest'}),
        'inlierLocal':
        FunctionSampler(outlier_rejection,
                        kw_args={'method': 'LocalOutlierFactor'}),
        'inlierEllip':
        FunctionSampler(outlier_rejection,
                        kw_args={'method': 'EllipticEnvelope'}),
        'inlierOsvm':
        FunctionSampler(outlier_rejection, kw_args={'method': 'OneClassSVM'}),
        # combine
        'smoteenn':
        SMOTEENN(),
        'smotelink':
        SMOTETomek(),
    }

    scale = {
        'stdscale': StandardScaler(),
        'maxscale': MinMaxScaler(),
        'rscale': RobustScaler(quantile_range=(10, 90)),
        'qauntile': QuantileTransformer(),  # uniform distribution
        'power': PowerTransformer(),  # Gaussian distribution
        'norm': Normalizer(),  # default L2 norm

        # scale sparse data
        'maxabs': MaxAbsScaler(),
        'stdscalesp': StandardScaler(with_mean=False),
    }
    # feature construction
    feature_c = {
        'pca': PCA(whiten=True),
        'spca': SparsePCA(normalize_components=True, n_jobs=-1),
        'ipca': IncrementalPCA(whiten=True),
        'kpca': KernelPCA(kernel='rbf', n_jobs=-1),
        'poly': PolynomialFeatures(degree=2),
        'rtembedding': RandomTreesEmbedding(n_estimators=10),
        'LDA': LinearDiscriminantAnalysis(),
        'QDA': QuadraticDiscriminantAnalysis(),
    }
    # select from model
    feature_m = {
        'fwoe':
        SelectFromModel(Woe_encoder(max_leaf_nodes=5)),
        'flog':
        SelectFromModel(
            LogisticRegressionCV(penalty='l1',
                                 solver='saga',
                                 scoring='roc_auc')),
        'fsgd':
        SelectFromModel(SGDClassifier(penalty="l1")),
        'fsvm':
        SelectFromModel(LinearSVC('l1', dual=False, C=1e-2)),
        'fxgb':
        SelectFromModel(XGBClassifier(n_jobs=-1)),
        'frf':
        SelectFromModel(ExtraTreesClassifier(n_estimators=100, max_depth=5)),
        'fRFExgb':
        RFE(XGBClassifier(n_jobs=-1), step=0.1, n_features_to_select=20),
        'fRFErf':
        RFE(ExtraTreesClassifier(n_estimators=100, max_depth=5),
            step=0.3,
            n_features_to_select=20),
        'fRFElog':
        RFE(LogisticRegressionCV(penalty='l1',
                                 solver='saga',
                                 scoring='roc_auc'),
            step=0.3,
            n_features_to_select=20)
    }
    # Univariate feature selection
    feature_u = {
        'fchi2':
        GenericUnivariateSelect(chi2, 'percentile', 25),
        'fMutualclf':
        GenericUnivariateSelect(mutual_info_classif, 'percentile', 25),
        'fFclf':
        GenericUnivariateSelect(f_classif, 'percentile', 25),
    }
    # sklearn estimator
    t = all_estimators(type_filter=['classifier'])
    estimator = {}
    for i in t:
        try:
            estimator.update({i[0]: i[1]()})
        except Exception:
            continue

    estimator.update(
        dummy=DummyClassifier(),
        XGBClassifier=XGBClassifier(n_jobs=-1),
        LogisticRegressionCV=LogisticRegressionCV(scoring='roc_auc'),
        EasyEnsembleClassifier=EasyEnsembleClassifier(),
        BalancedRandomForestClassifier=BalancedRandomForestClassifier(),
        RUSBoostClassifier=RUSBoostClassifier(),
        SVC=SVC(C=0.01, gamma='auto'))

    if pipe is None:
        feature_s = {}
        feature_s.update(**feature_m, **feature_u)
        return {
            'clean': clean.keys(),
            'encoding': encode.keys(),
            'resample': resample.keys(),
            'scale': scale.keys(),
            'feature_c': feature_c.keys(),
            'feature_s': feature_s.keys(),
            'classifier': estimator.keys()
        }
    elif isinstance(pipe, str):
        l = pipe.split('_')
        all_keys_dict = {}
        all_keys_dict.update(**clean, **encode, **scale, **feature_c,
                             **feature_m, **feature_u, **estimator, **resample)
        steps = []
        for i in l:
            if all_keys_dict.get(i) is not None:
                steps.append((i, all_keys_dict.get(i)))
            else:
                raise KeyError(
                    "'{}' invalid key for sklearn estimators".format(i))
        return Pipeline(steps)

    else:
        raise ValueError("input pipe must be a string in format 'xx[_xx]'")
示例#35
0
print('Accuracy of LDA transform test: %.2f' % accuracy_score(y_test, y_pred))
print('Accuracy of LDA transform train: %.2f' %
      accuracy_score(y_train, y_pred_train))
#svm
svm = SVC(kernel='linear', C=1.0, random_state=1)
svm.fit(X_train_lda, y_train)
y_pred = lr.predict(X_test_lda)
y_pred_train = lr.predict(X_train_lda)
print('Accuracy of LDA transform SVM test: %.2f' %
      accuracy_score(y_test, y_pred))
print('Accuracy of LDA transform SVM train: %.2f' %
      accuracy_score(y_train, y_pred_train))

#kPCA transform
scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
X_train_kpca = scikit_kpca.fit_transform(X_train_std)
X_test_kpca = scikit_kpca.transform(X_test_std)
#Logistic
lr = lr.fit(X_train_kpca, y_train)
y_pred = lr.predict(X_test_kpca)
y_pred_train = lr.predict(X_train_kpca)
print('Accuracy of kPCA transform test: %.2f' % accuracy_score(y_test, y_pred))
print('Accuracy of kPCA transform train: %.2f' %
      accuracy_score(y_train, y_pred_train))

#svm
svm = SVC(kernel='linear', C=1.0, random_state=1)
svm.fit(X_train_kpca, y_train)
y_pred = lr.predict(X_test_kpca)
y_pred_train = lr.predict(X_train_kpca)
示例#36
0
from load_dataset import load_data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

data_dir = '/home/kangle/dataset/PedBicCarData'
train_data, train_label, test_data, test_label = load_data(data_dir, 2, 2)

scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

print('\nbegin PCA process.')
#pca = PCA(n_components=1000, svd_solver='randomized', whiten=True).fit(train_data)
pca = KernelPCA(n_components=30,
                kernel='cosine',
                eigen_solver='arpack',
                n_jobs=8,
                fit_inverse_transform=True).fit(train_data)
#pca = SparsePCA(n_components=50,n_jobs=4).fit(train_data)
train_feature = pca.transform(train_data)
print(train_feature.shape)

train_feature_inverse = pca.inverse_transform(train_feature)
print(
    np.linalg.norm(train_feature_inverse - train_feature) /
    np.linalg.norm(train_feature))

#plt.plot(pca.explained_variance_ratio_)
#plt.plot(np.cumsum(pca.explained_variance_ratio_))
#plt.show()
示例#37
0
X_train1, X_test1 = data_preprocessing1(
    dataset=X_train1,
    test_dataset=X_test1,
    pairs=[[["carat", "depth", "table", "price", "x", "y", "z"], [scaler]],
           [["color", "clarity"], [enc]]])
X_train2, X_test2 = data_preprocessing1(
    dataset=X_train2,
    test_dataset=X_test2,
    pairs=[[[
        "fixed acidity", "volatile acidity", "citric acid", "residual sugar",
        "chlorides", "free sulfur dioxide", "total sulfur dioxide", "density",
        "pH", "sulphates", "alcohol"
    ], [scaler]], [[], [enc]]])

#kpca = KernelPCA(kernel="rbf", n_components=3)
kpca = KernelPCA(kernel="rbf", n_components=5)
#kpca = KernelPCA(kernel="rbf", n_components=6)
kpca.fit(X_train)
X_kpca = kpca.transform(X_train)
X_kpca_test = kpca.transform(X_test)
lda = LinearDiscriminantAnalysis()
lda.fit(X_kpca, y_train)
X_lda = lda.transform(X_kpca)
X_lda_test = lda.transform(X_kpca_test)

#kpca1 = KernelPCA(kernel="rbf",  n_components=5)
kpca1 = KernelPCA(kernel="rbf", n_components=14)
#kpca1 = KernelPCA(kernel="rbf", n_components=22)
kpca1.fit(X_train1)
X_kpca1 = kpca1.transform(X_train1)
X_kpca_test1 = kpca1.transform(X_test1)
def DecomposedFeatures(train,
                       test,
                       val,
                       total,
                       addtrain,
                       addtest,
                       use_pca=0.0,
                       use_tsvd=0.0,
                       use_ica=0.0,
                       use_fa=0.0,
                       use_grp=0.0,
                       use_srp=0.0,
                       use_KPCA=0.0,
                       kernal="rbf"):
    print("\nStart decomposition process...")
    train_decomposed = []
    test_decomposed = []
    val_decomposed = []

    if addtrain is not None:
        train_decomposed = [addtrain]
        val_decomposed = [addtrain]
    if addtest is not None:
        test_decomposed = [addtest]

    if use_pca > 0.0:
        print("PCA")
        N_COMP = int(use_pca * train.shape[1]) + 1
        pca = PCA(n_components=N_COMP,
                  whiten=True,
                  svd_solver="full",
                  random_state=42)
        pca_results = pca.fit(total)
        pca_results_train = pca.transform(train)
        pca_results_test = pca.transform(test)
        pca_results_val = pca.transform(val)
        train_decomposed.append(pca_results_train)
        test_decomposed.append(pca_results_test)
        val_decomposed.append(pca_results_val)

    if use_tsvd > 0.0:
        print("tSVD")
        N_COMP = int(use_tsvd * train.shape[1]) + 1
        tsvd = TruncatedSVD(n_components=N_COMP, random_state=42)
        tsvd_results = tsvd.fit(total)
        tsvd_results_train = tsvd.transform(train)
        tsvd_results_test = tsvd.transform(test)
        tsvd_results_val = tsvd.transform(val)

        train_decomposed.append(tsvd_results_train)
        test_decomposed.append(tsvd_results_test)
        val_decomposed.append(tsvd_results_val)

    if use_ica > 0.0:
        print("ICA")
        N_COMP = int(use_ica * train.shape[1]) + 1
        ica = FastICA(n_components=N_COMP, random_state=42)
        ica_results = ica.fit(total)
        ica_results_train = ica.transform(train)
        ica_results_test = ica.transform(test)
        ica_results_val = ica.transform(val)

        train_decomposed.append(ica_results_train)
        test_decomposed.append(ica_results_test)
        val_decomposed.append(ica_results_val)

    if use_fa > 0.0:
        print("FA")
        N_COMP = int(use_fa * train.shape[1]) + 1
        fa = FactorAnalysis(n_components=N_COMP, random_state=42)
        fa_results = fa.fit(total)
        fa_results_train = fa.transform(train)
        fa_results_test = fa.transform(test)
        fa_results_val = fa.transform(val)

        train_decomposed.append(fa_results_train)
        test_decomposed.append(fa_results_test)
        val_decomposed.append(fa_results_val)

    if use_grp > 0.0 or use_grp < 0.0:
        print("GRP")
        if use_grp > 0.0:
            N_COMP = int(use_grp * train.shape[1]) + 1
            eps = 10
        if use_grp < 0.0:
            N_COMP = "auto"
            eps = abs(use_grp)
        grp = GaussianRandomProjection(n_components=N_COMP,
                                       eps=eps,
                                       random_state=42)
        grp_results = grp.fit(total)
        grp_results_train = grp.transform(train)
        grp_results_test = grp.transform(test)
        grp_results_val = grp.transform(val)

        train_decomposed.append(grp_results_train)
        test_decomposed.append(grp_results_test)
        val_decomposed.append(grp_results_val)

    if use_srp > 0.0:
        print("SRP")
        N_COMP = int(use_srp * train.shape[1]) + 1
        srp = SparseRandomProjection(n_components=N_COMP,
                                     dense_output=True,
                                     random_state=42)
        srp_results = srp.fit(total)
        srp_results_train = srp.transform(train)
        srp_results_test = srp.transform(test)
        srp_results_val = pca.transform(val)

        train_decomposed.append(srp_results_train)
        test_decomposed.append(srp_results_test)
        val_decomposed.append(srp_results_val)

    if use_KPCA > 0.0:
        print("KPCA")
        N_COMP = int(use_KPCA * train.shape[1]) + 1
        #N_COMP = None
        pls = KernelPCA(n_components=N_COMP, kernel=kernal)
        pls_results = pls.fit(total)
        pls_results_train = pls.transform(train)
        pls_results_test = pls.transform(test)
        pls_results_val = pls.transform(val)
        train_decomposed.append(pls_results_train)
        test_decomposed.append(pls_results_test)
        val_decomposed.append(pls_results_val)
        gc.collect()

    print("Append decomposition components together...")

    train_decomposed = np.concatenate(train_decomposed, axis=1)
    test_decomposed = np.concatenate(test_decomposed, axis=1)
    val_decomposed = np.concatenate(val_decomposed, axis=1)

    train_with_only_decomposed_features = pd.DataFrame(train_decomposed)
    test_with_only_decomposed_features = pd.DataFrame(test_decomposed)
    val_with_only_decomposed_features = pd.DataFrame(val_decomposed)

    #for agg_col in ['sum', 'var', 'mean', 'median', 'std', 'weight_count', 'count_non_0', 'num_different', 'max', 'min']:
    #    train_with_only_decomposed_features[col] = train[col]
    #    test_with_only_decomposed_features[col] = test[col]

    # Remove any NA
    train_with_only_decomposed_features = train_with_only_decomposed_features.fillna(
        0)
    test_with_only_decomposed_features = test_with_only_decomposed_features.fillna(
        0)
    val_with_only_decomposed_features = val_with_only_decomposed_features.fillna(
        0)
    return train_with_only_decomposed_features, test_with_only_decomposed_features, val_with_only_decomposed_features
def make():

    for i in range(len(file_name)):
        name=file_name[i]

        data = mne.io.read_raw_edf(os.path.join(PATH, name))
        new_data = data.get_data()
        num_channel, data_len =new_data.shape

        j=len_data
        m=-1

        with open(save_PATH + 'EEG_filenames/' + 'EEG_filenames.txt', 'a') as f:

            while True:
                k = np.random.randint(5, 15)
                j += sampling_stride + k +len_data
                if j >= new_data.shape[1]:    #(9760?)   ---> EEG_segmented는 총 #3가 나온다
                    break
                EEG_segmented = new_data[:, j - len_data*2 - sampling_stride - k:j - len_data - sampling_stride - k]
                print('first')
                print(EEG_segmented.shape)  # (31,2200)

                n=len_window
                apply_5f=np.zeros((EEG_segmented.shape[0]*5, int(EEG_segmented.shape[1]/50)-int(1)))
                final_data = np.zeros((final_data_shape[0], final_data_shape[1]))
                print(apply_5f.shape)    # (155,44)
                m += 1




                # apply_5f 를 만들어준다 최종 shape (155,43)
                for u in range(num_window):   # range(43)
                    n += len_window_stride
                    window = EEG_segmented[:, n - len_window - len_window_stride:n - len_window_stride]
                    print(window.shape)     # (31,100)
                    for p in range(window.shape[0]):   #window.shape[0] = 31

                        apply_5f[5*p, u] = root_mean_square(window[p,:])
                        apply_5f[5*p + 1, u] = zero_crossing_rate(window[p, :])
                        apply_5f[5*p + 2, u] = moving_window_average(window[p, :])
                        apply_5f[5*p + 3, u] = kurtosis(window[p, :])
                        apply_5f[5*p + 4, u] = spectral_entropy(window[p, :], 1000, method='fft')

                #kpca
                apply_5f_new=np.transpose(apply_5f)
                print(apply_5f_new.shape)   # (43, 155)   (N_samples, n_features)
                kpca = KernelPCA(n_components=30, kernel='linear', gamma='none')
                post_kpca = kpca.fit_transform(apply_5f_new)
                post_kpca=np.transpose(post_kpca)
                print(post_kpca.shape)   # (30,43)

                # delta, delta-delta
                for r in range(post_kpca.shape[0]):
                    final_data[3 * r, :] = post_kpca[r, :]
                    final_data[3 * r + 1, :] = delta(post_kpca[r, :])
                    final_data[3 * r + 2, :] = delta(delta(post_kpca[r, :]))
                print('final_data shape')

                print(final_data.shape)



                np.save(save_PATH + 'EEG_datas/' + 'EEG_{0}_{1}.npy'.format(i,m), final_data)
                f.write('EEG_{0}_{1}.npy\n'.format(i,m))
示例#40
0
    def btnConvert_click(self):
        totalTime = 0
        msgBox = QMessageBox()

        # Batch
        try:
            Batch = np.int32(ui.txtBatch.text())
        except:
            msgBox.setText("Size of batch is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if Batch == 0:
            Batch = None

        # Kernel
        Kernel = ui.cbKernel.currentText()
        # Method
        Method = ui.cbMethod.currentText()

        # Gamma
        try:
            Gamma = np.float(ui.txtGamma.text())
        except:
            msgBox.setText("Gamma is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Degree
        try:
            Degree = np.int32(ui.txtDegree.text())
        except:
            msgBox.setText("Degree is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Coef0
        try:
            Coef0 = np.float(ui.txtCoef0.text())
        except:
            msgBox.setText("Coef0 is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Alpha
        try:
            Alpha = np.int32(ui.txtAlpha.text())
        except:
            msgBox.setText("Alpha is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Tol
        try:
            Tol = np.float(ui.txtTole.text())
        except:
            msgBox.setText("Tolerance is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # MaxIte
        try:
            MaxIter = np.int32(ui.txtMaxIter.text())
        except:
            msgBox.setText("Maximum number of iterations is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if MaxIter <= 0:
            MaxIter = None

        # Number of Job
        try:
            NJob = np.int32(ui.txtJobs.text())
        except:
            msgBox.setText("The number of parallel jobs is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if NJob < -1 or NJob == 0:
            msgBox.setText("The number of parallel jobs must be -1 or greater than 0!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False



        TrFoldErr = list()
        TeFoldErr = list()

        try:
            FoldFrom = np.int32(ui.txtFoldFrom.text())
            FoldTo   = np.int32(ui.txtFoldTo.text())
        except:
            print("Please check fold parameters!")
            return

        if FoldTo < FoldFrom:
            print("Please check fold parameters!")
            return

        for fold_all in range(FoldFrom, FoldTo+1):
            tic = time.time()
            # Regularization
            try:
                Regularization = np.float(ui.txtRegularization.text())
            except:
                msgBox.setText("Regularization value is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # OutFile
            OutFile = ui.txtOutFile.text()
            OutFile = OutFile.replace("$FOLD$", str(fold_all))
            if not len(OutFile):
                msgBox.setText("Please enter out file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # InFile
            InFile = ui.txtInFile.text()
            InFile = InFile.replace("$FOLD$", str(fold_all))
            if not len(InFile):
                msgBox.setText("Please enter input file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not os.path.isfile(InFile):
                msgBox.setText("Input file not found!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            InData = io.loadmat(InFile)
            OutData = dict()
            OutData["imgShape"] = InData["imgShape"]

            # Data
            if not len(ui.txtITrData.currentText()):
                msgBox.setText("Please enter Input Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeData.currentText()):
                msgBox.setText("Please enter Input Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrData.text()):
                msgBox.setText("Please enter Output Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeData.text()):
                msgBox.setText("Please enter Output Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            try:
                XTr = InData[ui.txtITrData.currentText()]
                XTe = InData[ui.txtITeData.currentText()]

                if ui.cbScale.isChecked() and not ui.rbScale.isChecked():
                    XTr = preprocessing.scale(XTr)
                    XTe = preprocessing.scale(XTe)
                    print("Whole of data is scaled X~N(0,1).")
            except:
                print("Cannot load data")
                return

            # NComponent
            try:
                NumFea = np.int32(ui.txtNumFea.text())
            except:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea < 1:
                msgBox.setText("Number of features must be greater than zero!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea > np.shape(XTr)[1]:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # Label
            if not len(ui.txtITrLabel.currentText()):
                    msgBox.setText("Please enter Train Input Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
            if not len(ui.txtITeLabel.currentText()):
                    msgBox.setText("Please enter Test Input Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
            if not len(ui.txtOTrLabel.text()):
                    msgBox.setText("Please enter Train Output Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
            if not len(ui.txtOTeLabel.text()):
                    msgBox.setText("Please enter Test Output Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
            try:
                OutData[ui.txtOTrLabel.text()] = InData[ui.txtITrLabel.currentText()]
                OutData[ui.txtOTeLabel.text()] = InData[ui.txtITeLabel.currentText()]
            except:
                print("Cannot load labels!")

            # Subject
            if not len(ui.txtITrSubject.currentText()):
                msgBox.setText("Please enter Train Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeSubject.currentText()):
                msgBox.setText("Please enter Test Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrSubject.text()):
                msgBox.setText("Please enter Train Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeSubject.text()):
                msgBox.setText("Please enter Test Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                TrSubject = InData[ui.txtITrSubject.currentText()]
                OutData[ui.txtOTrSubject.text()] = TrSubject
                TeSubject = InData[ui.txtITeSubject.currentText()]
                OutData[ui.txtOTeSubject.text()] = TeSubject
            except:
                print("Cannot load Subject IDs")
                return

            # Task
            if ui.cbTask.isChecked():
                if not len(ui.txtITrTask.currentText()):
                    msgBox.setText("Please enter Input Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeTask.currentText()):
                    msgBox.setText("Please enter Input Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrTask.text()):
                    msgBox.setText("Please enter Output Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeTask.text()):
                    msgBox.setText("Please enter Output Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrTask = InData[ui.txtITrTask.currentText()]
                    OutData[ui.txtOTrTask.text()] = TrTask
                    TeTask = InData[ui.txtITeTask.currentText()]
                    OutData[ui.txtOTeTask.text()] = TeTask
                    TrTaskIndex = TrTask.copy()
                    for tasindx, tas in enumerate(np.unique(TrTask)):
                        TrTaskIndex[TrTask == tas] = tasindx + 1
                    TeTaskIndex = TeTask.copy()
                    for tasindx, tas in enumerate(np.unique(TeTask)):
                        TeTaskIndex[TeTask == tas] = tasindx + 1
                except:
                    print("Cannot load Tasks!")
                    return

            # Run
            if ui.cbRun.isChecked():
                if not len(ui.txtITrRun.currentText()):
                    msgBox.setText("Please enter Train Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeRun.currentText()):
                    msgBox.setText("Please enter Test Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrRun.text()):
                    msgBox.setText("Please enter Train Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeRun.text()):
                    msgBox.setText("Please enter Test Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrRun = InData[ui.txtITrRun.currentText()]
                    OutData[ui.txtOTrRun.text()] = TrRun
                    TeRun = InData[ui.txtITeRun.currentText()]
                    OutData[ui.txtOTeRun.text()] = TeRun
                except:
                    print("Cannot load Runs!")
                    return

            # Counter
            if ui.cbCounter.isChecked():
                if not len(ui.txtITrCounter.currentText()):
                    msgBox.setText("Please enter Train Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeCounter.currentText()):
                    msgBox.setText("Please enter Test Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrCounter.text()):
                    msgBox.setText("Please enter Train Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeCounter.text()):
                    msgBox.setText("Please enter Test Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrCounter = InData[ui.txtITrCounter.currentText()]
                    OutData[ui.txtOTrCounter.text()] = TrCounter
                    TeCounter = InData[ui.txtITeCounter.currentText()]
                    OutData[ui.txtOTeCounter.text()] = TeCounter
                except:
                    print("Cannot load Counters!")
                    return

            # Matrix Label
            if ui.cbmLabel.isChecked():
                if not len(ui.txtITrmLabel.currentText()):
                    msgBox.setText("Please enter Train Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITemLabel.currentText()):
                    msgBox.setText("Please enter Test Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrmLabel.text()):
                    msgBox.setText("Please enter Train Output Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTemLabel.text()):
                    msgBox.setText("Please enter Test Output Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrmLabel.text()] = InData[ui.txtITrmLabel.currentText()]
                    OutData[ui.txtOTemLabel.text()] = InData[ui.txtITemLabel.currentText()]
                except:
                    print("Cannot load matrix lables!")
                    return

            # Design
            if ui.cbDM.isChecked():
                if not len(ui.txtITrDM.currentText()):
                    msgBox.setText("Please enter Train Input Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeDM.currentText()):
                    msgBox.setText("Please enter Test Input Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrDM.text()):
                    msgBox.setText("Please enter Train Output Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeDM.text()):
                    msgBox.setText("Please enter Test Output Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrDM.text()] = InData[ui.txtITrDM.currentText()]
                    OutData[ui.txtOTeDM.text()] = InData[ui.txtITeDM.currentText()]
                except:
                    print("Cannot load design matrices!")
                    return

            # Coordinate
            if ui.cbCol.isChecked():
                if not len(ui.txtCol.currentText()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCol.text()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCol.text()] = InData[ui.txtCol.currentText()]
                except:
                    print("Cannot load coordinator!")
                    return

            # Condition
            if ui.cbCond.isChecked():
                if not len(ui.txtCond.currentText()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCond.text()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCond.text()] = InData[ui.txtCond.currentText()]
                except:
                    print("Cannot load conditions!")
                    return

            # FoldID
            if ui.cbFoldID.isChecked():
                if not len(ui.txtFoldID.currentText()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldID.text()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldID.text()] = InData[ui.txtFoldID.currentText()]
                except:
                    print("Cannot load Fold ID!")
                    return

            # FoldInfo
            if ui.cbFoldInfo.isChecked():
                if not len(ui.txtFoldInfo.currentText()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldInfo.text()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldInfo.text()] = InData[ui.txtFoldInfo.currentText()]
                except:
                    print("Cannot load Fold Info!")
                    return
                pass

            # Number of Scan
            if ui.cbNScan.isChecked():
                if not len(ui.txtITrScan.currentText()):
                    msgBox.setText("Please enter Number of Scan variable name for Input Train!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeScan.currentText()):
                    msgBox.setText("Please enter Number of Scan variable name for Input Test!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrScan.text()):
                    msgBox.setText("Please enter Number of Scan variable name for Output Train!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeScan.text()):
                    msgBox.setText("Please enter Number of Scan variable name for Output Test!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrScan.text()] = InData[ui.txtITrScan.currentText()]
                    OutData[ui.txtOTeScan.text()] = InData[ui.txtITeScan.currentText()]
                except:
                    print("Cannot load NScan!")
                    return

            # Train Analysis Level
            print("Calculating Analysis Level for Training Set ...")
            TrGroupFold = None
            FoldStr = ""
            if ui.cbFSubject.isChecked():
                if not ui.rbFRun.isChecked():
                    TrGroupFold = TrSubject
                    FoldStr = "Subject"
                else:
                    TrGroupFold = np.concatenate((TrSubject,TrRun))
                    FoldStr = "Subject+Run"

            if ui.cbFTask.isChecked():
                TrGroupFold = np.concatenate((TrGroupFold,TrTaskIndex)) if TrGroupFold is not None else TrTaskIndex
                FoldStr = FoldStr + "+Task"

            if ui.cbFCounter.isChecked():
                TrGroupFold = np.concatenate((TrGroupFold,TrCounter)) if TrGroupFold is not None else TrCounter
                FoldStr = FoldStr + "+Counter"

            TrGroupFold = np.transpose(TrGroupFold)

            TrUniqFold = np.array(list(set(tuple(i) for i in TrGroupFold.tolist())))

            TrFoldIDs = np.arange(len(TrUniqFold)) + 1

            TrListFold = list()
            for gfold in TrGroupFold:
                for ufoldindx, ufold in enumerate(TrUniqFold):
                    if (ufold == gfold).all():
                        currentID = TrFoldIDs[ufoldindx]
                        break
                TrListFold.append(currentID)
            TrListFold = np.int32(TrListFold)
            TrListFoldUniq = np.unique(TrListFold)


            # Test Analysis Level
            print("Calculating Analysis Level for Testing Set ...")
            TeGroupFold = None
            if ui.cbFSubject.isChecked():
                if not ui.rbFRun.isChecked():
                    TeGroupFold = TeSubject
                else:
                    TeGroupFold = np.concatenate((TeSubject,TeRun))

            if ui.cbFTask.isChecked():
                TeGroupFold = np.concatenate((TeGroupFold,TeTaskIndex)) if TeGroupFold is not None else TeTaskIndex

            if ui.cbFCounter.isChecked():
                TeGroupFold = np.concatenate((TeGroupFold,TeCounter)) if TeGroupFold is not None else TeCounter

            TeGroupFold = np.transpose(TeGroupFold)

            TeUniqFold = np.array(list(set(tuple(i) for i in TeGroupFold.tolist())))

            TeFoldIDs = np.arange(len(TeUniqFold)) + 1

            TeListFold = list()
            for gfold in TeGroupFold:
                for ufoldindx, ufold in enumerate(TeUniqFold):
                    if (ufold == gfold).all():
                        currentID = TeFoldIDs[ufoldindx]
                        break
                TeListFold.append(currentID)
            TeListFold = np.int32(TeListFold)
            TeListFoldUniq = np.unique(TeListFold)

            # Train Partition
            print("Partitioning Training Data ...")
            TrX = list()
            TrShape = None

            if Method == "PCA":
                svdmodel = PCA(n_components=NumFea,copy=False,tol=Tol)
            elif Method == "Kernel PCA":
                svdmodel = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\
                              coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False)
            else:
                svdmodel = IncrementalPCA(n_components=NumFea,copy=False,batch_size=Batch)

            for foldindx, fold in enumerate(TrListFoldUniq):
                dat = XTr[np.where(TrListFold == fold)]
                if ui.cbScale.isChecked() and ui.rbScale.isChecked():
                    dat = preprocessing.scale(dat)
                    print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).")

                dat = svdmodel.fit_transform(dat)
                TrX.append(dat)
                if TrShape is None:
                    TrShape = np.shape(dat)
                else:
                    if not(TrShape == np.shape(dat)):
                        print("ERROR: Train, Reshape problem for Fold " + str(foldindx + 1) + ", Shape: " + str(np.shape(dat)))
                        return
                print("Train: View " + str(foldindx + 1) + " is extracted. Shape: " + str(np.shape(dat)))

            print("Training Shape: " + str(np.shape(TrX)))

            # Test Partition
            print("Partitioning Testing Data ...")
            TeX = list()
            TeShape = None
            for foldindx, fold in enumerate(TeListFoldUniq):
                dat = XTe[np.where(TeListFold == fold)]
                if ui.cbScale.isChecked() and ui.rbScale.isChecked():
                    dat = preprocessing.scale(dat)
                    print("Data belong to View " + str(foldindx + 1) + " is scaled X~N(0,1).")

                dat = svdmodel.fit_transform(dat)
                TeX.append(dat)
                if TeShape is None:
                    TeShape = np.shape(dat)
                else:
                    if not(TeShape == np.shape(dat)):
                        print("Test: Reshape problem for Fold " + str(foldindx + 1))
                        return
                print("Test: View " + str(foldindx + 1) + " is extracted.")

            print("Testing Shape: " + str(np.shape(TeX)))

            model = RHA(Dim=NumFea,regularization=Regularization)

            print("Running Hyperalignment on Training Data ...")
            MappedXtr, G = model.train(TrX)

            print("Running Hyperalignment on Testing Data ...")
            MappedXte =  model.test(TeX)

            # Train Dot Product
            print("Producting Training Data ...")
            TrHX = None
            TrErr = None
            for foldindx, fold in enumerate(TrListFoldUniq):
                TrErr = TrErr + (G - MappedXtr[foldindx]) if TrErr is not None else G - MappedXtr[foldindx]
                TrHX = np.concatenate((TrHX, MappedXtr[foldindx])) if TrHX is not None else MappedXtr[foldindx]
            OutData[ui.txtOTrData.text()] = TrHX
            foldindx = foldindx + 1
            TrErr = TrErr / foldindx
            print("Train: alignment error ", np.linalg.norm(TrErr))
            TrFoldErr.append(np.linalg.norm(TrErr))

            # Train Dot Product
            print("Producting Testing Data ...")
            TeHX = None
            TeErr = None
            for foldindx, fold in enumerate(TeListFoldUniq):
                TeErr = TeErr + (G - MappedXte[foldindx]) if TeErr is not None else G - MappedXte[foldindx]
                TeHX = np.concatenate((TeHX, MappedXte[foldindx])) if TeHX is not None else MappedXte[foldindx]
            OutData[ui.txtOTeData.text()] = TeHX
            foldindx = foldindx + 1
            TeErr = TeErr / foldindx
            print("Test: alignment error ", np.linalg.norm(TeErr))
            TeFoldErr.append(np.linalg.norm(TeErr))

            HAParam = dict()
            HAParam["Method"]= Method
            HAParam["Kernel"]= Kernel
            HAParam["Share"] = G
            HAParam["Level"] = FoldStr
            OutData["FunctionalAlignment"] = HAParam
            OutData["Runtime"] = time.time() - tic
            totalTime += OutData["Runtime"]

            print("Saving ...")
            io.savemat(OutFile, mdict=OutData)
            print("Fold " + str(fold_all) + " is DONE: " + OutFile)

        print("Training -> Alignment Error: mean " + str(np.mean(TrFoldErr)) + " std " + str(np.std(TrFoldErr)))
        print("Testing  -> Alignment Error: mean " + str(np.mean(TeFoldErr)) + " std " + str(np.std(TeFoldErr)))
        print("Runtime: ", totalTime)
        print("Kernel/SVD Hyperalignment is done.")
        msgBox.setText("Kernel/SVD Hyperalignment is done.")
        msgBox.setIcon(QMessageBox.Information)
        msgBox.setStandardButtons(QMessageBox.Ok)
        msgBox.exec_()
# Show user which aggregates were created
print(
    f">> Created {len(aggregate_df.columns)} features for; {aggregate_df.columns.tolist()}"
)
COMPONENTS = 10

# Convert to sparse matrix
sparse_matrix = scipy.sparse.csr_matrix(total_df.values)

# Data to be passed to t-SNE
tsvd = TruncatedSVD(n_components=1000).fit_transform(sparse_matrix)

# V1 List of decomposition methods
methods = [{
    'method': KernelPCA(n_components=2, kernel="rbf"),
    'data': 'total'
}, {
    'method': FactorAnalysis(n_components=COMPONENTS),
    'data': 'total'
}, {
    'method': TSNE(n_components=3, init='pca'),
    'data': 'tsvd'
}, {
    'method': TruncatedSVD(n_components=COMPONENTS),
    'data': 'sparse'
}, {
    'method': PCA(n_components=COMPONENTS),
    'data': 'total'
}, {
    'method': FastICA(n_components=COMPONENTS),
示例#42
0
fig = plt.figure(figsize=(6, 5))
ax = fig.add_subplot(111, projection='3d')

ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=t, cmap=plt.cm.hot)
ax.view_init(10, -70)
ax.set_title("Swiss roll")
ax.set_xlabel("$x_1$", fontsize=18)
ax.set_ylabel("$x_2$", fontsize=18)
ax.set_zlabel("$x_3$", fontsize=18)
ax.set_xlim(axes[0:2])
ax.set_ylim(axes[2:4])
ax.set_zlim(axes[4:6])
plt.show()

rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)

lin_pca = KernelPCA(n_components = 2, kernel="linear", fit_inverse_transform=True)
rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.0433, fit_inverse_transform=True)
sig_pca = KernelPCA(n_components = 2, kernel="sigmoid", gamma=0.001, coef0=1, fit_inverse_transform=True)

y = t > 6.9

plt.figure(figsize=(11, 4))
for subplot, pca, title in ((131, lin_pca, "Linear kernel"), (132, rbf_pca, "RBF kernel, $\gamma=0.04$"), (133, sig_pca, "Sigmoid kernel, $\gamma=10^{-3}, r=1$")):
    X_reduced = pca.fit_transform(X)
    if subplot == 132:
        X_reduced_rbf = X_reduced
    
    plt.subplot(subplot)
示例#43
0
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

#Feature Scaling
from sklearn.preprocessing import StandardScaler  #To get more accurate results

sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

#Applying Kernel PCA
from sklearn.decomposition import KernelPCA

kpca = KernelPCA(
    n_components=2, kernel='rbf'
)  #We use None here initially instead of 2 since we need to compare all the variances of the independent variables and then choose the two best ones.
X_train = kpca.fit_transform(
    X_train
)  #We dont take the dependent variable (y_train in this case) as PCA is unsupervised
X_test = kpca.transform(X_test)

# Fitting Logistic Regression to the Training Set
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0)  #To get the same result
classifier.fit(X_train, y_train)

#Prediciting Test set Results
y_pred = classifier.predict(X_test)
示例#44
0
fig, axs = plt.subplots(2, 4)

for i in range(inverse_data.shape[1] - 1):
    axs[i // 4, i % 4].scatter(inverse_data[:, i],
                               data[:, (i + 1)],
                               c=labels,
                               cmap='hsv')

    axs[i // 4, i % 4].set_xlabel(var_names[i])
    axs[i // 4, i % 4].set_ylabel(var_names[i + 1])

plt.show()

#KernelPCA
kernelPCA = KernelPCA(n_components=4, kernel='cosine')
kernelPCA_data = kernelPCA.fit(data).transform(data)

fig, axs = plt.subplots(1, 1)

plt.scatter(kernelPCA_data[:, 0], kernelPCA_data[:, 1], c=labels, cmap='hsv')
plt.show()

plt.show()

#SparsePCA
sparsePCA = SparsePCA(n_components=4, alpha=0.0)
sparsePCA_data = sparsePCA.fit(data).transform(data)

fig, axs = plt.subplots(1, 1)
示例#45
0
# Spliting into training and test set
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=0)

# Feature scaling on data
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
train_X = sc_x.fit_transform(train_X)
test_X = sc_x.transform(test_X)

# Applying kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, kernel='rbf')
train_X = kpca.fit_transform(train_X)
test_X = kpca.transform(test_X)

# Applying logistic clasification model
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(train_X, train_y)

# Predicting the output of model
y_pred = classifier.predict(test_X)

# Testing the outcome
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(test_y, y_pred)
score = accuracy_score(test_y, y_pred) * 100
    # Its first line contains information about the simulation
    info_str = samples_file.readline()
    print(info_str[:-1])

    # Collect all the samples into X
    X = []
    for x in samples_file:
        X.append(np.array(x[0:-1].split(' ')).astype("float64"))
    X = np.asanyarray(X)
    samples_file.close()
    print("Read", len(X), "samples of dimension", len(X[0]))
    m = len(X[0])
    # start the kernel PCA to find low energy submanifold
    kpcaRBF = KernelPCA(n_components=3,
                        kernel="rbf",
                        fit_inverse_transform=True,
                        n_jobs=-1)
    reducedXrbf = kpcaRBF.fit_transform(X)
    # Let's plot the reduced 3D space
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(reducedXrbf[:, 0], reducedXrbf[:, 1], reducedXrbf[:, 2])
    plt.title("3D reduction of the " + str(m) + "D parameter with rbf kernel")
    plt.savefig("ALLDATA.png")
    #    plt.show()

    reconstructedX = kpcaRBF.inverse_transform(reducedXrbf)
    print("Reconstructing error: ", norm(X - reconstructedX))

    print("Let's find the surface of minimal energy")
    # Information for defining the potential
示例#47
0
    file_path = os.path.join(curr_path, data_path, file_name)
    img = nib.load(file_path).get_data()
    img = np.sum(
        img,
        axis=3)  # to remove the '4th' dimension which is basically intesity
    # crop the image with given offset
    img = img[rmin - offset:rmax + offset, cmin - offset:cmax + offset,
              zmin - offset:zmax + offset]
    X[t, :] = img.reshape(n, )
    print('file ', file_name, ' read successfully')

n_features = 30  # number of low dimensional features we want
print("Extracting the top %d eigenimages from %d images" %
      (n_features, n_train))
#pca = PCA(n_components = n_features, svd_solver = 'randomized' , whiten = True).fit(X)
kpca = KernelPCA(kernel="linear").fit(X)
X_train_pca = kpca.transform(X)
#eigenimages = pca.components_   # This is the matrix used to transform to lowdimensional feature space
#X_train_pca = pca.transform(X)  # Training sets after transformation
print("PCA completed successfully ...")

# Load csv file into numpy array
age_train = np.genfromtxt(os.path.join(curr_path, 'targets.csv'),
                          delimiter=',')

#reg = make_pipeline(PolynomialFeatures(degree), RidgeCV())
reg = SVC()
reg.fit(X_train_pca, age_train)
print("Data fitted with CV Ridge Regression")

# Prediction Error
示例#48
0
                    alpha=0.6,
                    c=cmap(idx),
                    edgecolor='black',
                    marker=markers[idx],
                    label=cl)


plot_decision_regions(X_train_pca, y_train, classifier=svm)
plt.xlabel('LDA_SVM_PC 1')
plt.ylabel('LDA_SVM_PC 2')
plt.legend(loc='lower left')
plt.show()

#kPCA

scikit_kpca = KernelPCA(n_components=2, kernel='rbf',
                        gamma=0.1)  #Gamma values can be updated here
X_train_skernpca = scikit_kpca.fit_transform(X_train_std, y_train)
X_test_skernpca = scikit_kpca.transform(X_test_std)

#Logistic Regression fitted on KPCA transformed data

lr = LogisticRegression()
lr.fit(X_train_skernpca, y_train)
kpca_lr_y_train_pred = lr.predict(X_train_skernpca)
kpca_lr_y_pred = lr.predict(X_test_skernpca)
print("KPCA Logistic Regression train accuracy score (gamma=0.1): ",
      metrics.accuracy_score(y_train, kpca_lr_y_train_pred))
print("KPCA Logistic Regression test accuracy score (gamma=0.1): ",
      metrics.accuracy_score(y_test, kpca_lr_y_pred))

#SVM Regression fitted on KPCA transformed dataset
示例#49
0
def dr_cluster(data, method, gamma, params, clusters, stepsize, rows_toload,
               dropped_class_numbers):
    if (method == "Kmeans2D"):
        components = 2
    if (method == "Kmeans1D" or method == "Thresholding"):
        components = 1
        flag = 0
        resetflag = 0
    logger.writelog(components, "Components")
    logger.result_open(method)
    print(method)
    max_sc = -100.0
    best_purity = 0.0
    best_gamma = 0.0
    serial_num = 0
    try:
        for i in range(0, params + 1):
            transformer = KernelPCA(n_components=components,
                                    kernel='rbf',
                                    gamma=gamma)
            data_transformed = transformer.fit_transform(data)
            df = pd.DataFrame(data_transformed)
            df.to_csv(KPCA_output_path, index=False, header=None)
            del df
            gc.collect()
            if (method == "Thresholding"):
                if (flag == 0):
                    os.system("cc c_thresholding_new.c")
                    flag = 1
                start = timeit.default_timer()
                os.system("./a.out " + str(clusters) + " " + str(rows_toload))
                end = timeit.default_timer()
                thresholding_time = (end - start)
                sc = silhouette.silhouette(KPCA_output_path,
                                           Thresholding_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    't', Thresholding_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, thresholding_time,
                                   gamma, sc, purity)
                #print(i+1,thresholding_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       thresholding_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            thresholding_time, best_gamma,
                                            max_sc, best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            else:
                kmeans_time = kmeans.kmeans(KPCA_output_path, KMeans_paths[1],
                                            clusters)
                kmeans.groundtruth_distribution(KMeans_paths[1],
                                                KMeans_paths[0],
                                                datafiles_names[0],
                                                datafiles_names[2], clusters)
                sc = silhouette.silhouette(KPCA_output_path, KMeans_paths[1])
                groundtruth_distribution, temp_assignment_error_matrix, row_ind, col_ind, class_numbers, purity = hungarian.hungarian(
                    'k', KMeans_paths[0], clusters, rows_toload,
                    dropped_class_numbers)
                logger.writeresult(i + 1, clusters, method, kmeans_time, gamma,
                                   sc, purity)
                #print(i+1,kmeans_time,gamma,sc,purity)
                if (i < params):
                    if (sc > max_sc):
                        max_sc = sc
                        best_gamma = gamma
                        best_purity = purity
                        serial_num = i + 1
                if (i == (params - 1)):
                    gamma = best_gamma
                    sc = max_sc
                    purity = best_purity
                if (i == params):
                    print(best_gamma, max_sc, best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writeresult(serial_num, clusters, method,
                                       kmeans_time, best_gamma, max_sc,
                                       best_purity)
                    logger.writeresult(" ", " ", " ", " ", " ", " ", " ")
                    logger.writefinalresult(serial_num, clusters, method,
                                            kmeans_time, best_gamma, max_sc,
                                            best_purity)
                    write_hungarian_result(best_gamma, clusters,
                                           groundtruth_distribution,
                                           temp_assignment_error_matrix,
                                           row_ind, col_ind, class_numbers,
                                           best_purity, method, params,
                                           stepsize, dropped_class_numbers)
            if (i < (params - 1)):
                gamma = gamma + stepsize
    except (KeyboardInterrupt, SystemExit, Exception) as ex:
        ex_type, ex_value, ex_traceback = sys.exc_info()
        trace_back = traceback.extract_tb(ex_traceback)
        logger.writelog(str(ex_type.__name__), "Exception Type")
        logger.writelog(str(ex_value), "Exception Message")
        logger.writelog(str(trace_back), "Traceback")
    finally:
        logger.result_close()
示例#50
0
            if mode == MODE_SDIC:
                vic = sdic.sdic(sdic.SDIC_TYPE_SDIC)
                vic.fit(x_train)
                x_train_new = vic.transform(x_train)
                x_test_new = vic.transform(x_test)
            elif mode == MODE_SDIC_C:
                vic = sdic.sdic(sdic.SDIC_TYPE_SDIC_C)
                vic.fit(x_train)
                x_train_new = vic.transform(x_train)
                x_test_new = vic.transform(x_test)
            elif mode == MODE_DI:
                x_train_new = np.zeros((x_train.shape[0], img_size, img_size))
                x_test_new = np.zeros((x_test.shape[0], img_size, img_size))

                from sklearn.decomposition import KernelPCA
                pca = KernelPCA(n_components=2)
                X = x_train.reshape(x_train.shape[0], img_size * img_size)
                Xt = x_test.reshape(x_test.shape[0], img_size * img_size)

                x = pca.fit_transform(np.transpose(X))
                x[:, 0] = x[:, 0] - np.min(x[:, 0])
                x[:, 0] = x[:, 0] / np.max(x[:, 0]) * (img_size - 1)
                x[:, 1] = x[:, 1] - np.min(x[:, 1])
                x[:, 1] = x[:, 1] / np.max(x[:, 1]) * (img_size - 1)
                x = x.round().astype('int')

                pts_per_coord = {}
                for i in range(0, x.shape[0]):
                    coord = (x[i, 0], x[i, 1])
                    x_train_new[:, x[i, 0], x[i, 1]] += X[:, i]
                    if coord not in pts_per_coord:
示例#51
0
    def btnConvert_click(self):
        msgBox = QMessageBox()
        totalTime = 0
        # Batch
        try:
            Batch = np.int32(ui.txtBatch.text())
        except:
            msgBox.setText("Size of batch is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if Batch == 0:
            Batch = None

        # Kernel
        Kernel = ui.cbKernel.currentText()
        # Method
        Method = ui.cbMethod.currentText()

        # Gamma
        try:
            Gamma = np.float(ui.txtGamma.text())
        except:
            msgBox.setText("Gamma is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Degree
        try:
            Degree = np.int32(ui.txtDegree.text())
        except:
            msgBox.setText("Degree is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Coef0
        try:
            Coef0 = np.float(ui.txtCoef0.text())
        except:
            msgBox.setText("Coef0 is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Alpha
        try:
            Alpha = np.int32(ui.txtAlpha.text())
        except:
            msgBox.setText("Alpha is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Tol
        try:
            Tol = np.float(ui.txtTole.text())
        except:
            msgBox.setText("Tolerance is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # MaxIte
        try:
            MaxIter = np.int32(ui.txtMaxIter.text())
        except:
            msgBox.setText("Maximum number of iterations is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if MaxIter <= 0:
            MaxIter = None

        # Number of Job
        try:
            NJob = np.int32(ui.txtJobs.text())
        except:
            msgBox.setText("The number of parallel jobs is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if NJob < -1 or NJob == 0:
            msgBox.setText(
                "The number of parallel jobs must be -1 or greater than 0!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        try:
            FoldFrom = np.int32(ui.txtFoldFrom.text())
            FoldTo = np.int32(ui.txtFoldTo.text())
        except:
            print("Please check fold parameters!")
            return

        if FoldTo < FoldFrom:
            print("Please check fold parameters!")
            return

        for fold_all in range(FoldFrom, FoldTo + 1):
            tic = time.time()
            # OutFile
            OutFile = ui.txtOutFile.text()
            OutFile = OutFile.replace("$FOLD$", str(fold_all))
            if not len(OutFile):
                msgBox.setText("Please enter out file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # InFile
            InFile = ui.txtInFile.text()
            InFile = InFile.replace("$FOLD$", str(fold_all))
            if not len(InFile):
                msgBox.setText("Please enter input file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not os.path.isfile(InFile):
                msgBox.setText("Input file not found!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            InData = io.loadmat(InFile)
            OutData = dict()
            OutData["imgShape"] = InData["imgShape"]

            # Data
            if not len(ui.txtITrData.currentText()):
                msgBox.setText("Please enter Input Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeData.currentText()):
                msgBox.setText("Please enter Input Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrData.text()):
                msgBox.setText("Please enter Output Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeData.text()):
                msgBox.setText("Please enter Output Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            try:
                XTr = InData[ui.txtITrData.currentText()]
                XTe = InData[ui.txtITeData.currentText()]

                if ui.cbScale.isChecked():
                    XTr = preprocessing.scale(XTr)
                    XTe = preprocessing.scale(XTe)
                    print("Whole of data is scaled X~N(0,1).")
            except:
                print("Cannot load data")
                return

            try:
                NumFea = np.int32(ui.txtNumFea.text())
            except:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea < 0:
                msgBox.setText("Number of features must be greater than zero!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            if NumFea > np.shape(XTr)[1]:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            if NumFea > np.shape(XTe)[1]:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # Label
            if not len(ui.txtITrLabel.currentText()):
                msgBox.setText("Please enter Train Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeLabel.currentText()):
                msgBox.setText("Please enter Test Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrLabel.text()):
                msgBox.setText(
                    "Please enter Train Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeLabel.text()):
                msgBox.setText("Please enter Test Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                OutData[ui.txtOTrLabel.text()] = InData[
                    ui.txtITrLabel.currentText()]
                OutData[ui.txtOTeLabel.text()] = InData[
                    ui.txtITeLabel.currentText()]
            except:
                print("Cannot load labels!")

            # Subject
            if not len(ui.txtITrSubject.currentText()):
                msgBox.setText(
                    "Please enter Train Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeSubject.currentText()):
                msgBox.setText(
                    "Please enter Test Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrSubject.text()):
                msgBox.setText(
                    "Please enter Train Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeSubject.text()):
                msgBox.setText(
                    "Please enter Test Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                TrSubject = InData[ui.txtITrSubject.currentText()]
                OutData[ui.txtOTrSubject.text()] = TrSubject
                TeSubject = InData[ui.txtITeSubject.currentText()]
                OutData[ui.txtOTeSubject.text()] = TeSubject
            except:
                print("Cannot load Subject IDs")
                return

            # Task
            if ui.cbTask.isChecked():
                if not len(ui.txtITrTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrTask.text()):
                    msgBox.setText(
                        "Please enter Output Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeTask.text()):
                    msgBox.setText(
                        "Please enter Output Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrTask = InData[ui.txtITrTask.currentText()]
                    OutData[ui.txtOTrTask.text()] = TrTask
                    TeTask = InData[ui.txtITeTask.currentText()]
                    OutData[ui.txtOTeTask.text()] = TeTask
                    TrTaskIndex = TrTask.copy()
                    for tasindx, tas in enumerate(np.unique(TrTask)):
                        TrTaskIndex[TrTask == tas] = tasindx + 1
                    TeTaskIndex = TeTask.copy()
                    for tasindx, tas in enumerate(np.unique(TeTask)):
                        TeTaskIndex[TeTask == tas] = tasindx + 1
                except:
                    print("Cannot load Tasks!")
                    return

            # Run
            if ui.cbRun.isChecked():
                if not len(ui.txtITrRun.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeRun.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrRun.text()):
                    msgBox.setText(
                        "Please enter Train Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeRun.text()):
                    msgBox.setText(
                        "Please enter Test Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrRun = InData[ui.txtITrRun.currentText()]
                    OutData[ui.txtOTrRun.text()] = TrRun
                    TeRun = InData[ui.txtITeRun.currentText()]
                    OutData[ui.txtOTeRun.text()] = TeRun
                except:
                    print("Cannot load Runs!")
                    return

            # Counter
            if ui.cbCounter.isChecked():
                if not len(ui.txtITrCounter.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeCounter.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrCounter.text()):
                    msgBox.setText(
                        "Please enter Train Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeCounter.text()):
                    msgBox.setText(
                        "Please enter Test Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrCounter = InData[ui.txtITrCounter.currentText()]
                    OutData[ui.txtOTrCounter.text()] = TrCounter
                    TeCounter = InData[ui.txtITeCounter.currentText()]
                    OutData[ui.txtOTeCounter.text()] = TeCounter
                except:
                    print("Cannot load Counters!")
                    return

            # Matrix Label
            if ui.cbmLabel.isChecked():
                if not len(ui.txtITrmLabel.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITemLabel.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrmLabel.text()):
                    msgBox.setText(
                        "Please enter Train Output Matrix Label variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTemLabel.text()):
                    msgBox.setText(
                        "Please enter Test Output Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrmLabel.text()] = InData[
                        ui.txtITrmLabel.currentText()]
                    OutData[ui.txtOTemLabel.text()] = InData[
                        ui.txtITemLabel.currentText()]
                except:
                    print("Cannot load matrix lables!")
                    return

            # Design
            if ui.cbDM.isChecked():
                if not len(ui.txtITrDM.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeDM.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrDM.text()):
                    msgBox.setText(
                        "Please enter Train Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeDM.text()):
                    msgBox.setText(
                        "Please enter Test Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrDM.text()] = InData[
                        ui.txtITrDM.currentText()]
                    OutData[ui.txtOTeDM.text()] = InData[
                        ui.txtITeDM.currentText()]
                except:
                    print("Cannot load design matrices!")
                    return

            # Coordinate
            if ui.cbCol.isChecked():
                if not len(ui.txtCol.currentText()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCol.text()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCol.text()] = InData[
                        ui.txtCol.currentText()]
                except:
                    print("Cannot load coordinator!")
                    return

            # Condition
            if ui.cbCond.isChecked():
                if not len(ui.txtCond.currentText()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCond.text()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCond.text()] = InData[
                        ui.txtCond.currentText()]
                except:
                    print("Cannot load conditions!")
                    return

            # FoldID
            if ui.cbFoldID.isChecked():
                if not len(ui.txtFoldID.currentText()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldID.text()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldID.text()] = InData[
                        ui.txtFoldID.currentText()]
                except:
                    print("Cannot load Fold ID!")
                    return

            # FoldInfo
            if ui.cbFoldInfo.isChecked():
                if not len(ui.txtFoldInfo.currentText()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldInfo.text()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldInfo.text()] = InData[
                        ui.txtFoldInfo.currentText()]
                except:
                    print("Cannot load Fold Info!")
                    return
                pass

            # Number of Scan
            if ui.cbNScan.isChecked():
                if not len(ui.txtITrScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrScan.text()] = InData[
                        ui.txtITrScan.currentText()]
                    OutData[ui.txtOTeScan.text()] = InData[
                        ui.txtITeScan.currentText()]
                except:
                    print("Cannot load NScan!")
                    return

            if NumFea == 0:
                NumFea = np.min(np.shape(XTr))
                print("Number of features are automatically selected as ",
                      NumFea)

            try:
                if Method == "PCA":
                    model = PCA(n_components=NumFea, copy=False, tol=Tol)
                elif Method == "Kernel PCA":
                    model = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\
                                  coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False)
                else:
                    model = IncrementalPCA(n_components=NumFea,
                                           copy=False,
                                           batch_size=Batch)

                print("Running PCA Functional Alignment on Training Data ...")
                OutData[ui.txtOTrData.text()] = model.fit_transform(XTr)
                print("Running PCA Functional Alignment on Testing Data ...")
                OutData[ui.txtOTeData.text()] = model.fit_transform(XTe)
            except Exception as e:
                print(str(e))

            HAParam = dict()
            HAParam["Method"] = Method
            HAParam["NumFea"] = NumFea
            HAParam["Kernel"] = Kernel
            OutData["FunctionalAlignment"] = HAParam
            OutData["Runtime"] = time.time() - tic
            totalTime += OutData["Runtime"]

            print("Saving ...")
            io.savemat(OutFile, mdict=OutData)
            print("Fold " + str(fold_all) + " is DONE: " + OutFile)
        print("Runtime: ", totalTime)
        print("PCA Functional Alignment is done.")
        msgBox.setText("PCA Functional Alignment is done.")
        msgBox.setIcon(QMessageBox.Information)
        msgBox.setStandardButtons(QMessageBox.Ok)
        msgBox.exec_()
Let's use that on the half-moon dataset.
'''

from sklearn.decomposition import KernelPCA
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
import numpy as np

X, y = make_moons(n_samples=100, random_state=123)

#plt.scatter(X[y==0, 0], X[y==0, 1], color = 'red', marker = '^', alpha = 0.5)
#plt.scatter(X[y==1, 0], X[y==1, 1], color = 'blue', marker = 'o', alpha = 0.5)

#plt.show()

scikit_kpca = KernelPCA(n_components=2, kernel="rbf", gamma=15)
X_skernpca = scikit_kpca.fit_transform(X)

plt.scatter(X_skernpca[y == 0, 0],
            X_skernpca[y == 0, 1],
            color="red",
            marker="^",
            alpha=0.5)
plt.scatter(X_skernpca[y == 1, 0],
            X_skernpca[y == 1, 1],
            color="blue",
            marker="o",
            alpha=0.5)
plt.xlabel("PC1")
plt.ylabel("PC2")
plt.show()
示例#53
0
class PCA_Benchmark():
    def __init__(self,prop_path=None,srch_path=None,chunksize = 1000,batch_size =300,only=None,pca_type='IPCA',kernel_type='linear',whiten=False,model_name=''):
        self.model_name = model_name
        self.kernel_type = kernel_type
        self.type = pca_type
        self.chunksize = chunksize
        self.batch_size = batch_size
        self.only = only
        self.whiten = whiten
        kernel_limit = 3000
        # self.input_variables =  matrix.property[data.property_attributes].columns.values
        if self.only == 'prop':

            self.prop_data = pd.DataFrame(pickle.load( open(  prop_path, "rb" )) )
            self.srch_data = None
            # self.prop_data = self.prop_data[self.prop_data.columns.drop(list(self.prop_data.filter(regex='visitor_hist_adr_usd_')))]
            # self.prop_data = self.prop_data.fillna(0).head(50000)#.drop(columns=[])
            if pca_type == 'Kernel':
                self.prop_data = self.prop_data.head(kernel_limit)
        elif self.only == 'srch':

            self.srch_data = pd.DataFrame(pickle.load( open(  srch_path, "rb" ) ))
            self.prop_data = None
            if pca_type == 'Kernel':
                self.srch_data = self.srch_data.head(kernel_limit)
        elif self.only == None:
            self.prop_data = pickle.load( open(  prop_path, "rb" ) )
            self.srch_data = pickle.load( open(  srch_path, "rb" ) )
            if pca_type == 'Kernel':
                self.prop_data = self.prop_data.head(kernel_limit)
                self.srch_data = self.srch_data.head(kernel_limit)
                 
            # self.prop_data = self.prop_data[self.prop_data.columns.drop(list(self.prop_data.filte))]
            # self.prop_data = self.prop_data#.drop(columns=[])
        
        # print('props')
        # print(len(list(self.prop_data.columns)))
        # print(list(self.prop_data.columns))
        # print('srchs')
        
        # print(len(list(self.srch_data.columns)))
        # print(list(self.srch_data.columns))
        if only == 'prop' or only is None:
            self.prop_data = self.to_numpy(self.prop_data) if  self.only == 'prop' or only == None else self.prop_data
        

        if only == 'srch' or only is None:
            # self.srch_data = self.to_numpy(self.srch_data) if  self.only == 'srch' or only == None else self.srch_data
        
            self.srch_data = self.to_numpy(self.srch_data)  if  self.only == 'srch' or only == None else self.srch_data

    def to_numpy(self,matrix):
        return matrix.to_numpy()


    def run_props(self):
        # data = self.prop_data.to_numpy()
        
        if self.type == 'IPCA':
            self.prop_pca = IncrementalPCA(n_components=self.prop_data.shape[1]-1, batch_size=self.batch_size,whiten=self.whiten)
        elif self.type == 'Kernel':
            self.prop_pca = KernelPCA(n_components=self.prop_data.shape[1]-1,kernel=self.kernel_type)
            
        # chunk_size = 300

        # for i in range(0, data.shape[1]//self.chunksize):
        #     self.prop_pca.partial_fit(data[i*self.chunksize : (i+1)*self.chunksize])
        # for i in range(0, num_rows//chunk_size):
        #     data[i*self.chunksize:(i+1) * self.chunksize] = ipca.transform(features[i*self.chunksize : (i+1)*self.chunksize])
        print(self.prop_data.shape)
        self.prop_pca = self.prop_pca.fit(self.prop_data)
        kpca_transform = self.prop_pca.transform(self.prop_data) 
        explained_variance = np.var(kpca_transform, axis=0)
        explained_variance_ratio = explained_variance / np.sum(explained_variance)
        # foo = '_whitened_' if self.whiten else '_'
        # model_name = self.type + '_prop' + foo +'model.pkl'
        pickle.dump( self.prop_pca , open(self.model_name , "wb" ) )
        return explained_variance_ratio

    def run_srchs(self):
        
        if self.type == 'IPCA':
            self.srch_pca = IncrementalPCA(n_components=self.srch_data.shape[1]-1, batch_size=self.batch_size,whiten=self.whiten)
        elif self.type == 'Kernel':
            self.srch_pca = KernelPCA(n_components=self.srch_data.shape[1]-1,kernel=self.kernel_type)
        # chunk_size = 300
        # for i in range(0, data.shape[1]//self.chunksize):
        #     self.srch_pca.partial_fit(data[i*self.chunksize : (i+1)*self.chunksize])

        self.srch_pca = self.srch_pca.fit(self.srch_data)
        kpca_transform = self.srch_pca.transform(self.srch_data) 
        
        
        
        # foo = '_whitened_' if self.whiten else '_'
        # model_name = self.type + '_srch' + foo +'model.pkl'
        pickle.dump( self.srch_pca , open(self.model_name, "wb" ) )
        # pickle.dump(self.srch_pca, open( self.type+'_srch_model.pkl', "wb" ) )
                
        explained_variance = np.var(kpca_transform, axis=0)
        explained_variance_ratio = explained_variance / np.sum(explained_variance)
        return explained_variance_ratio
    
    def run_all(self,show=True,save=True):
        exp_var_ratio_props = None
        exp_var_ratio_srchs = None
        if self.only == 'prop' or self.only == None:

            exp_var_ratio_props = self.run_props()
            plt.figure(1)
            plt.plot(np.cumsum(exp_var_ratio_props))
            foo = 'Whitened' if self.whiten else ''
            plt.title(self.type+' ' +foo +  ' Principal Components Cumulative Explained Variance For Properties')
            plt.xlabel('number of components')
            plt.ylabel('cumulative explained variance')
                
            # foo = 'whitened_' if self.whiten else '_'
            # plot_name = self.type + '_prop_' + foo +'model'
            plt.savefig('plots/'+self.type+'_'+foo+'_cum_explained_var_prop.png')
        if self.only== 'srch'  or self.only == None:

        
        
            
            exp_var_ratio_srchs = self.run_srchs()
            
            
            plt.figure(2)
            plt.plot(np.cumsum(exp_var_ratio_srchs))
            foo = 'Whitened' if self.whiten else ''
            plt.title(self.type+' ' + foo +  ' Principal Components Cumulative Explained Variance For Queries')
            plt.xlabel('number of components')
            plt.ylabel('cumulative explained variance')
            
            plt.savefig('plots/'+self.type+'_'+foo+'_cum_explained_var_srch.png')
        
        if show:
            plt.show()
        # if save:
        #     if self.only == 'prop' or self.only is None:
        #         plt.savefig('plots/IPCA_cum_explained_var_props.png')
        #         return exp_var_ratio_props,None
        #     if self.only == 'srch' or self.only is None:
        #         plt.savefig('plots/IPCA_cum_explained_var_srchs.png')
        #         return None,exp_var_ratio_srchs
        return exp_var_ratio_props, exp_var_ratio_srchs
示例#54
0
文件: code.py 项目: Aj-KamaL/ML-AI
D=np.array(pdB)
print(C.shape,D.shape)

corr_matrix = pdA.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop = [column for column in upper.columns if any(upper[column] > 0.95)]
print(len(to_drop))
pdA.drop(labels=to_drop, axis=1, inplace=True)
pdB.drop(labels=to_drop, axis=1, inplace=True)
G=np.array(pdA)
H=np.array(pdB)
print(G.shape,H.shape)

print(G.shape,H.shape)

pca = KernelPCA(n_components=95)
pca.fit(np.concatenate((G,H)))
GG = pca.transform(G)
HH = pca.transform(H)
print(GG.shape,HH.shape)

clf= MLPClassifier(max_iter=2000)
from sklearn.model_selection import ShuffleSplit
ss=ShuffleSplit(n_splits=20, test_size=0.5, random_state=10)
scores = cross_val_score(clf, GG,xlab, cv=ss,n_jobs=-1, verbose=1)
print(np.mean(scores))
print(scores) 
clf.fit(GG, xlab)

pred= clf.predict(HH)
print (len(pred))
    plt.grid(True)       

if (0):
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
        
    d = pair.pairwise_distances(Xtrain,Xtrain)
    aux = np.triu(d)
    sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5))
    gamma = 1/(2*sigma**2)

if (0):
    #%% K-PCA
    # Calculate accumulated variance
    kpca = KernelPCA(kernel="rbf",gamma=gamma)
    kpca.fit_transform(Xtrain)
    eigenvals = kpca.lambdas_[0:220]

    
    # Calculate classifiation scores for each component
    nComponents =  np.linspace(1, 500, 100, endpoint=True)
    kpcaScores = np.zeros((5,np.alen(nComponents)))
    
    kpca = KernelPCA(n_components = Ntrain,kernel="rbf",gamma=gamma)
    kpca.fit(Xtrain)
    XtrainT = kpca.transform(Xtrain)
    XtestT = kpca.transform(Xtest)
    

    for i in range(len(nComponents)):   
# import seaborn as sns
import sklearn.decomposition
from sklearn.decomposition import PCA




    dimreductiontype='pca'
    
    from sklearn.decomposition import PCA,KernelPCA,FactorAnalysis
    
    
    if(dimreductiontype=='pca'):
        pca = PCA(n_components = nr ,whiten=True)#min(df.shape))
    elif(dimreductiontype=='kpca'):
        pca = KernelPCA(n_components=min(df.shape))
    elif(dimreductiontype=='fa'):
        pca = FactorAnalysis(n_components=min(df.shape))
        
    Z = pca.fit_transform(X)
    
    try:
        print("pca.n_components ", pca.n_components)
        print("pca.n_features_ ", pca.n_features_)
        print("pca.n_samples_ ", pca.n_samples_)
        print('pca.noise_variance_ ', pca.noise_variance_)
    except Exception:
        1;
    
    try:
        ax,fig=plt.subplots(1,1)
示例#57
0
def main():
    # ----- settings:
    dataset = 'MNIST'  # --> 'Facial' or 'MNIST' or 'Breast_cancer'
    embedding_method = 'Isomap'
    n_components = 5
    split_in_cross_validation_again = False
    load_dataset_again = False
    subset_of_MNIST = True
    pick_subset_of_MNIST_again = False
    MNIST_subset_cardinality_training = 10000  # picking from first samples of 60,000 samples
    MNIST_subset_cardinality_testing = 5000  # picking from first samples of 10,000 samples
    # ----- paths:
    if dataset == 'Facial':
        path_dataset = './input/att_database/'
        path_dataset_save = './input/pickle_dataset/Facial/'
    elif dataset == 'MNIST':
        path_dataset = './input/mnist/'
        path_dataset_save = './input/pickle_dataset/MNIST/'
    elif dataset == 'Breast_cancer':
        path_dataset = './input/Breast_cancer_dataset/wdbc_data.txt'
        path_dataset_save = './input/pickle_dataset/MNIST/'
    # ----- Loading dataset:
    print('Reading dataset...')
    if dataset == 'MNIST':
        if load_dataset_again:
            training_data = list(
                read_MNIST_dataset(dataset="training", path=path_dataset))
            testing_data = list(
                read_MNIST_dataset(dataset="testing", path=path_dataset))

            number_of_training_samples = len(training_data)
            dimension_of_data = 28 * 28
            X_train = np.empty((0, dimension_of_data))
            y_train = np.empty((0, 1))
            for sample_index in range(number_of_training_samples):
                if np.mod(sample_index, 1) == 0:
                    print('sample ' + str(sample_index) + ' from ' +
                          str(number_of_training_samples) + ' samples...')
                label, pixels = training_data[sample_index]
                pixels_reshaped = np.reshape(pixels, (1, 28 * 28))
                X_train = np.vstack([X_train, pixels_reshaped])
                y_train = np.vstack([y_train, label])
            y_train = y_train.ravel()

            number_of_testing_samples = len(testing_data)
            dimension_of_data = 28 * 28
            X_test = np.empty((0, dimension_of_data))
            y_test = np.empty((0, 1))
            for sample_index in range(number_of_testing_samples):
                if np.mod(sample_index, 1) == 0:
                    print('sample ' + str(sample_index) + ' from ' +
                          str(number_of_testing_samples) + ' samples...')
                label, pixels = testing_data[sample_index]
                pixels_reshaped = np.reshape(pixels, (1, 28 * 28))
                X_test = np.vstack([X_test, pixels_reshaped])
                y_test = np.vstack([y_test, label])
            y_test = y_test.ravel()

            save_variable(X_train, 'X_train', path_to_save=path_dataset_save)
            save_variable(y_train, 'y_train', path_to_save=path_dataset_save)
            save_variable(X_test, 'X_test', path_to_save=path_dataset_save)
            save_variable(y_test, 'y_test', path_to_save=path_dataset_save)
        else:
            file = open(path_dataset_save + 'X_train.pckl', 'rb')
            X_train = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y_train.pckl', 'rb')
            y_train = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'X_test.pckl', 'rb')
            X_test = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y_test.pckl', 'rb')
            y_test = pickle.load(file)
            file.close()

        if subset_of_MNIST:
            if pick_subset_of_MNIST_again:
                X_train_picked = X_train[
                    0:MNIST_subset_cardinality_training, :]
                X_test_picked = X_test[0:MNIST_subset_cardinality_testing, :]
                y_train_picked = y_train[0:MNIST_subset_cardinality_training]
                y_test_picked = y_test[0:MNIST_subset_cardinality_testing]
                save_variable(X_train_picked,
                              'X_train_picked',
                              path_to_save=path_dataset_save)
                save_variable(X_test_picked,
                              'X_test_picked',
                              path_to_save=path_dataset_save)
                save_variable(y_train_picked,
                              'y_train_picked',
                              path_to_save=path_dataset_save)
                save_variable(y_test_picked,
                              'y_test_picked',
                              path_to_save=path_dataset_save)
            else:
                file = open(path_dataset_save + 'X_train_picked.pckl', 'rb')
                X_train_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'X_test_picked.pckl', 'rb')
                X_test_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'y_train_picked.pckl', 'rb')
                y_train_picked = pickle.load(file)
                file.close()
                file = open(path_dataset_save + 'y_test_picked.pckl', 'rb')
                y_test_picked = pickle.load(file)
                file.close()
            X_train = X_train_picked
            X_test = X_test_picked
            y_train = y_train_picked
            y_test = y_test_picked
        image_shape = (28, 28)
    elif dataset == 'Facial':
        if load_dataset_again:
            X, y, image_shape = read_image_dataset(dataset_path=path_dataset,
                                                   imagesType='.jpg')
            save_variable(variable=X,
                          name_of_variable='X',
                          path_to_save=path_dataset_save)
            save_variable(variable=y,
                          name_of_variable='y',
                          path_to_save=path_dataset_save)
            save_variable(variable=image_shape,
                          name_of_variable='image_shape',
                          path_to_save=path_dataset_save)
        else:
            file = open(path_dataset_save + 'X.pckl', 'rb')
            X = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'y.pckl', 'rb')
            y = pickle.load(file)
            file.close()
            file = open(path_dataset_save + 'image_shape.pckl', 'rb')
            image_shape = pickle.load(file)
            file.close()
    elif dataset == 'Breast_cancer':
        data = pd.read_csv(
            path_dataset, sep=",", header=None
        )  # read text file using pandas dataFrame: https://stackoverflow.com/questions/21546739/load-data-from-txt-with-pandas
        labels_of_classes = ['M', 'B']
        X, y = read_BreastCancer_dataset(data=data,
                                         labels_of_classes=labels_of_classes)
        X = X.astype(
            np.float64
        )  #---> otherwise MDS has error --> https://stackoverflow.com/questions/16990996/multidimensional-scaling-fitting-in-numpy-pandas-and-sklearn-valueerror
        # --- cross validation:
        path_to_save = './input/split_data/'
        portion_of_test_in_dataset = 0.3
        number_of_folds = 10
        if split_in_cross_validation_again:
            train_indices_in_folds, test_indices_in_folds, \
            X_train_in_folds, X_test_in_folds, y_train_in_folds, y_test_in_folds = \
                cross_validation(X=X, y=y, n_splits=number_of_folds, test_size=portion_of_test_in_dataset)
            save_variable(train_indices_in_folds,
                          'train_indices_in_folds',
                          path_to_save=path_to_save)
            save_variable(test_indices_in_folds,
                          'test_indices_in_folds',
                          path_to_save=path_to_save)
            save_variable(X_train_in_folds,
                          'X_train_in_folds',
                          path_to_save=path_to_save)
            save_variable(X_test_in_folds,
                          'X_test_in_folds',
                          path_to_save=path_to_save)
            save_variable(y_train_in_folds,
                          'y_train_in_folds',
                          path_to_save=path_to_save)
            save_variable(y_test_in_folds,
                          'y_test_in_folds',
                          path_to_save=path_to_save)
            for fold_index in range(number_of_folds):
                save_np_array_to_txt(np.asarray(
                    train_indices_in_folds[fold_index]),
                                     'train_indices_in_fold' + str(fold_index),
                                     path_to_save=path_to_save)
                save_np_array_to_txt(np.asarray(
                    test_indices_in_folds[fold_index]),
                                     'test_indices_in_folds' + str(fold_index),
                                     path_to_save=path_to_save)
        else:
            file = open(path_to_save + 'train_indices_in_folds.pckl', 'rb')
            train_indices_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'test_indices_in_folds.pckl', 'rb')
            test_indices_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'X_train_in_folds.pckl', 'rb')
            X_train_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'X_test_in_folds.pckl', 'rb')
            X_test_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'y_train_in_folds.pckl', 'rb')
            y_train_in_folds = pickle.load(file)
            file.close()
            file = open(path_to_save + 'y_test_in_folds.pckl', 'rb')
            y_test_in_folds = pickle.load(file)
            file.close()

    print(X_train.shape)
    print(X_test.shape)

    # ----- embedding:
    print('Embedding...')
    if dataset == 'MNIST':
        # plot_components(X_projected=X_projected, images=X.reshape((-1, image_shape[0], image_shape[1])), ax=ax, image_scale=0.6, markersize=10, thumb_frac=0.05, cmap='gray_r')

        # ----- embedding:
        if embedding_method == 'LLE':
            clf = LLE(n_neighbors=5,
                      n_components=n_components,
                      method='standard')
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'Isomap':
            clf = Isomap(n_neighbors=5, n_components=n_components)
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'MDS':
            clf = MDS(n_components=n_components)
            X_projected = clf.fit_transform(X=np.vstack([X_train, X_test]))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'PCA':
            clf = PCA(n_components=n_components)
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'KernelPCA':
            clf = KernelPCA(n_components=n_components, kernel='rbf')
            clf.fit(X=X_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'LaplacianEigenmap':
            clf = LaplacianEigenmap(n_neighbors=5, n_components=n_components)
            X_projected = clf.fit_transform(X=np.vstack([X_train, X_test]))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'LDA':
            clf = LDA(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'SPCA':
            clf = SPCA(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'TSNE':
            clf = TSNE(n_components=min(3, n_components))
            # print(type(list(y_train)))
            X_projected = clf.fit_transform(
                X=np.vstack([X_train, X_test]),
                y=np.asarray(list(y_train) + list(y_test)))
            X_train_projected = X_projected[:X_train.shape[0], :]
            X_test_projected = X_projected[X_train.shape[0]:, :]
        elif embedding_method == 'ML':
            clf = ML(n_components=n_components)
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'Kernel_FLDA':
            clf = Kernel_FLDA(n_components=n_components, kernel='linear')
            clf.fit(X=X_train, y=y_train)
            X_train_projected = clf.transform(X=X_train)
            X_test_projected = clf.transform(X=X_test)
        elif embedding_method == 'No_embedding':
            X_train_projected = X_train
            X_test_projected = X_test

        # --- classification:
        print('Classification...')
        # clf = KNN(n_neighbors=1)
        clf = NB()
        clf.fit(X=X_train_projected, y=y_train)
        y_pred = clf.predict(X=X_test_projected)
        accuracy = accuracy_score(y_true=y_test, y_pred=y_pred)
        error = 1 - accuracy_score(y_true=y_test, y_pred=y_pred)

        # --- saving results:
        save_variable(accuracy, 'accuracy', path_to_save='./output/MNIST/')
        save_np_array_to_txt(np.asarray(accuracy),
                             'accuracy',
                             path_to_save='./output/MNIST/')
        save_variable(error, 'error', path_to_save='./output/MNIST/')
        save_np_array_to_txt(np.asarray(error),
                             'error',
                             path_to_save='./output/MNIST/')
        # --- report results:
        print(' ')
        print('Accuracy: ', accuracy * 100)
        print(' ')
        print('Error: ', error * 100)
X = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Applying Kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components = 2, kernel = 'rbf')
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

# Fitting Logistic Regression to the Training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state = 0)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
示例#59
0
catb = plt.scatter(X[y == 1, 0],
                   X[y == 1, 1],
                   color='blue',
                   marker='s',
                   alpha=0.5)
plt.show()
plt.close()

gamma = 5

K_lap = laplacian_kernel(X, gamma=gamma)

kpcas = []

kpcas.append(
    ('Linear KPCA', 'lin_kpca', KernelPCA(n_components=2, kernel='linear')))
kpcas.append(('RBF KPCA', 'rbf_kpca',
              KernelPCA(n_components=2, kernel='rbf', gamma=gamma)))
kpcas.append(('Laplacian KPCA', 'lap_kpca',
              KernelPCA(n_components=2, kernel='precomputed')))
kpcas.append(('Sigmoid KPCA', 'sig_kpca',
              KernelPCA(n_components=2, kernel='sigmoid', gamma=gamma)))
kpcas.append(('Cosine KPCA', 'cos_kpca',
              KernelPCA(n_components=2, kernel='cosine', gamma=gamma)))

for kernel, abbreviation, kpca in kpcas:

    if kernel == 'Laplacian KPCA':
        X_kpca = kpca.fit_transform(K_lap)
    else:
        X_kpca = kpca.fit_transform(X)
示例#60
0
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import KernelPCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

dataset = pd.read_csv('wine.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=0)

stc = StandardScaler()
x_train = stc.fit_transform(x_train)
x_test = stc.transform(x_test)

kernel_pca = KernelPCA(n_components=2, kernel='rbf')
x_train = kernel_pca.fit_transform(x_train)
x_test = kernel_pca.transform(x_test)

classifier = LogisticRegression()
classifier.fit(x_train, y_train)

y_pred = classifier.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print('Confusion matrix of the model is:', cm)
acc = accuracy_score(y_test, y_pred)
print('Accuracy of the model is:', acc)

x_set = x_train
y_set = y_train
x1, x2 = np.meshgrid(np.arange(min(x_set[:, 0]), max(x_set[:, 0]), step=0.01),