def _init_random_gaussians(self, X):
     """ Initialize gaussian randomly """
     n_samples = np.shape(X)[0]
     self.priors = (1 / self.k) * np.ones(self.k)
     for i in range(self.k):
         params = {}
         params["mean"] = X[np.random.choice(range(n_samples))]
         params["cov"] = calculate_covariance_matrix(X)
         self.parameters.append(params)
示例#2
0
 def _transform(self, X, dim):
     covariance = calculate_covariance_matrix(X)
     eigenvalues, eigenvectors = np.linalg.eig(covariance)
     #sort eigenvalues and eiganvector by largest eigenvalues
     idx = eigenvalues.argsort()[::-1]
     eigenvalues = eigenvalues[idx][:dim]
     eigenvectors = np.atleast_1d(eigenvectors[:, idx][:, :dim])
     #project the data onto principal components
     X_transformed = np.dot(X, eigenvectors)
     return X_transformed
    def fit(self,X, y):
        #separate data by class
        X1 = X[y==0]
        X2 = X[y==1]

        #calcualte the covariance matrices of the two datasets
        cov1 = calculate_covariance_matrix(X1)
        cov2 = calculate_covariance_matrix(X2)

        cov_tot = cov1 + cov2

        #calculate the mean of the two datasets
        mean1 = X1.mean(0)
        mean2 = X2.mean(0)
        mean_diff = np.atleast_1d(mean1 - mean2)

        #determine the vector which when X is projected
        #onto its best separates the data by class w=(mean1-mean2)/(cov1+cov2)
        self.w = np.linalg.pinv(cov_tot).dot(mean_diff)
    def fit(self, X, y):
        # 将数据集分两类
        X1 = X[y == 0]
        X2 = X[y == 1]

        # 计算各分类的散列矩阵
        X1_conv_mat = calculate_covariance_matrix(X1)
        X2_conf_mat = calculate_covariance_matrix(X2)

        # 散列矩阵
        SW = X1_conv_mat + X2_conf_mat

        # 均值
        X1_mean = X1.mean(0)
        X2_mean = X2.mean(0)

        diff_mean = np.atleast_1d(X1_mean - X2_mean)

        # 最佳方向w : w = SW的逆矩阵 * (diff_mean)
        self.w = np.linalg.pinv(SW).dot(diff_mean).dot(diff_mean)
    def transform(self, X, n_components):
        """ Fit the dataset to the number of principal components specified in the
        constructor and return the transformed dataset """
        covariance_matrix = calculate_covariance_matrix(X)

        # Where (eigenvector[:,0] corresponds to eigenvalue[0])
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

        # Sort the eigenvalues and corresponding eigenvectors from largest
        # to smallest eigenvalue and select the first n_components
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx][:n_components]
        eigenvectors = np.atleast_1d(eigenvectors[:idx])[:, :n_components]

        #project the data onto principal component
        X_transformed = X.dot(eigenvectors)

        return X_transformed
    def init_random_gaussians(self, X):
        """
        随机初始化高斯分布
        Parameters
        ----------
        X : np.array
        """
        # 样本数量
        n_samples = np.shape(X)[0]
        # 先验概率
        self.priors = (1 / self.k) * np.ones(self.k)

        for i in range(self.k):
            params = {}
            # np.random.choice() 从给定的一维数据生成随机样本
            params['mean'] = X[np.random.choice(range(n_samples))]
            params['cov'] = calculate_covariance_matrix(X)
            self.parameters.append(params)
    def _calculate_scatter_matrix(self, X, y):
        # 计算SW
        n_features = np.shape(X)[1]
        n_class = np.unique(y)
        SW = np.empty((n_features, n_features))

        for _class in n_class:
            class_X = X[y == _class]
            SW += calculate_covariance_matrix(class_X)

        SB = np.empty(n_features, n_features)
        tatol_mean = np.mean(X, axis=0)
        for _class in n_class:
            class_X = X[y == _class]
            class_mean = np.mean(class_X, axis=0)
            SB += len(class_X) * (class_mean - tatol_mean)

        return SW, SB
示例#8
0
	def transform(self,X,n_components):
		"""
		fit dataset to principal_components and return transformed dataset

		"""
		## get eigen vals and vectors from covar matrix
		covariance_matrix = calculate_covariance_matrix(X)
		eigenvalues , eigenvectors = np.linalg.eig(covariance_matrix)

		## sort eigenvalues to get largest
		idx = eigenvalues.argsort()[::-1]
		eigenvalues = eigenvalues[idx][:n_components]
		eigenvectors = np.atleast_1d(eigenvectors[]:,idx)[:n_components]

		## project data onto principal components

		X_transformed = X.dot(eigenvectors)

		return X_transformed