def predict_sample_generate_proba(self, X): # 返回样本的生成概率 W = np.asarray([ utils.gaussian_nd(X, u, sigma) * alpha for alpha, u, sigma in self.params ]).T return np.sum(W, axis=1)
def predict_proba(self, X): # 预测样本在几个高斯模型上的概率分布 W = np.asarray([ utils.gaussian_nd(X, u, sigma) * alpha for alpha, u, sigma in self.params ]).T W = W / np.sum(W, axis=1, keepdims=True) return W
def fit(self, X): n_sample, _ = X.shape # 初始化参数 u = np.mean(X, axis=0) sigma = np.cov(X.T) alpha = 1.0 / self.n_components max_value = X.max() min_value = X.min() for _ in range(0, self.n_components): # 每个高斯模型的权重初始化一样 # 每个高斯模型的均值在整体均值的基础上添加一个随机的bias # 方差初始化一样,使用整体的方差 self.params.append([alpha, u + np.random.random() * (max_value + min_value) / 2, sigma]) # 计算当前的隐变量 W = np.asarray([utils.gaussian_nd(X, u, sigma) * alpha for alpha, u, sigma in self.params]).T # 记录当前的log like hold current_log_loss = np.log(W.sum(axis=1)).sum() / n_sample W = W / np.sum(W, axis=1, keepdims=True) # 迭代训练 for _ in range(0, self.n_iter): if self.verbose is True: utils.plot_contourf(X, lambda x: self.predict_sample_generate_proba(x), lines=5) utils.plt.pause(0.1) utils.plt.clf() # 更新高斯模型参数 for k in range(0, self.n_components): self.params[k][0] = W[:, k].sum() / n_sample # 更新alpha self.params[k][1] = np.sum(W[:, [k]] * X, axis=0) / W[:, k].sum() # 更新均值 self.params[k][2] = np.sum( [W[i, k] * (X[[i]] - self.params[k][1]).T.dot(X[[i]] - self.params[k][1]) for i in range(0, n_sample)], axis=0) / W[:, k].sum() # 更新方差 # 更新当前的隐变量 W = np.asarray([utils.gaussian_nd(X, u, sigma) * alpha for alpha, u, sigma in self.params]).T # 计算log like hold new_log_loss = np.log(W.sum(axis=1)).sum() / n_sample W = W / np.sum(W, axis=1, keepdims=True) if new_log_loss - current_log_loss > self.tol: current_log_loss = new_log_loss else: break if self.verbose: utils.plot_contourf(X, lambda x: self.predict_sample_generate_proba(x), lines=5) utils.plt.show()