def _init_random_gaussians(self, X): """ Initialize gaussian randomly """ n_samples = np.shape(X)[0] self.priors = (1 / self.k) * np.ones(self.k) for i in range(self.k): params = {} params["mean"] = X[np.random.choice(range(n_samples))] params["cov"] = calculate_covariance_matrix(X) self.parameters.append(params)
def _transform(self, X, dim): covariance = calculate_covariance_matrix(X) eigenvalues, eigenvectors = np.linalg.eig(covariance) #sort eigenvalues and eiganvector by largest eigenvalues idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:dim] eigenvectors = np.atleast_1d(eigenvectors[:, idx][:, :dim]) #project the data onto principal components X_transformed = np.dot(X, eigenvectors) return X_transformed
def fit(self,X, y): #separate data by class X1 = X[y==0] X2 = X[y==1] #calcualte the covariance matrices of the two datasets cov1 = calculate_covariance_matrix(X1) cov2 = calculate_covariance_matrix(X2) cov_tot = cov1 + cov2 #calculate the mean of the two datasets mean1 = X1.mean(0) mean2 = X2.mean(0) mean_diff = np.atleast_1d(mean1 - mean2) #determine the vector which when X is projected #onto its best separates the data by class w=(mean1-mean2)/(cov1+cov2) self.w = np.linalg.pinv(cov_tot).dot(mean_diff)
def fit(self, X, y): # 将数据集分两类 X1 = X[y == 0] X2 = X[y == 1] # 计算各分类的散列矩阵 X1_conv_mat = calculate_covariance_matrix(X1) X2_conf_mat = calculate_covariance_matrix(X2) # 散列矩阵 SW = X1_conv_mat + X2_conf_mat # 均值 X1_mean = X1.mean(0) X2_mean = X2.mean(0) diff_mean = np.atleast_1d(X1_mean - X2_mean) # 最佳方向w : w = SW的逆矩阵 * (diff_mean) self.w = np.linalg.pinv(SW).dot(diff_mean).dot(diff_mean)
def transform(self, X, n_components): """ Fit the dataset to the number of principal components specified in the constructor and return the transformed dataset """ covariance_matrix = calculate_covariance_matrix(X) # Where (eigenvector[:,0] corresponds to eigenvalue[0]) eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix) # Sort the eigenvalues and corresponding eigenvectors from largest # to smallest eigenvalue and select the first n_components idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[:idx])[:, :n_components] #project the data onto principal component X_transformed = X.dot(eigenvectors) return X_transformed
def init_random_gaussians(self, X): """ 随机初始化高斯分布 Parameters ---------- X : np.array """ # 样本数量 n_samples = np.shape(X)[0] # 先验概率 self.priors = (1 / self.k) * np.ones(self.k) for i in range(self.k): params = {} # np.random.choice() 从给定的一维数据生成随机样本 params['mean'] = X[np.random.choice(range(n_samples))] params['cov'] = calculate_covariance_matrix(X) self.parameters.append(params)
def _calculate_scatter_matrix(self, X, y): # 计算SW n_features = np.shape(X)[1] n_class = np.unique(y) SW = np.empty((n_features, n_features)) for _class in n_class: class_X = X[y == _class] SW += calculate_covariance_matrix(class_X) SB = np.empty(n_features, n_features) tatol_mean = np.mean(X, axis=0) for _class in n_class: class_X = X[y == _class] class_mean = np.mean(class_X, axis=0) SB += len(class_X) * (class_mean - tatol_mean) return SW, SB
def transform(self,X,n_components): """ fit dataset to principal_components and return transformed dataset """ ## get eigen vals and vectors from covar matrix covariance_matrix = calculate_covariance_matrix(X) eigenvalues , eigenvectors = np.linalg.eig(covariance_matrix) ## sort eigenvalues to get largest idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx][:n_components] eigenvectors = np.atleast_1d(eigenvectors[]:,idx)[:n_components] ## project data onto principal components X_transformed = X.dot(eigenvectors) return X_transformed