def get_gamma_ij(self, i, j): dnm = 0 for l in range(0, self.num_clusters): dnm += utils.multivariate_gaussian(self.df.values[i], self.means[l], self.vars[l]) return utils.multivariate_gaussian(self.df.values[i], self.means[j], self.vars[j]) / dnm
def __e_step(self): # print(self.gamma) N = self.X.shape[0] k = self.n_components self.lower_bound_ = 0 for i in range(N): p = np.zeros(k) for j in range(k): p[j] = self.weights_[j] * utils.multivariate_gaussian( self.X[i], self.means_[j], self.covariances_[j]) # print('x, mean, cov: ', self.X[i], self.means_[j], self.covariances_[j]) # print('self.weights_[j] ', self.weights_[j]) # print('utils.multivariate_gaussian(self.X[i], self.means_[j], self.covariances_[j]) ', utils.multivariate_gaussian(self.X[i], self.means_[j], self.covariances_[j])) # # print('pij ',i, ' ', j, ' ', p[j]) sp = p.sum() for j in range(k): self.gamma[i, j] = p[j] / sp self.lower_bound_ += np.log(sp) # print('e step self.gamma: ', self.gamma) return self
def get_L(self): n = len(self.df) l = 0 for i in range(0, n): t = 0 for j in range(0, self.num_clusters): t += utils.multivariate_gaussian(self.df.values[i], self.means[j], self.vars[j]) # *self.w[j l += math.log(t) l /= n return l
def predict(self, X): N = X.shape[0] k = self.n_components gamma = np.zeros([N, k]) for i in range(N): p = np.zeros(k) for j in range(k): p[j] = self.weights_[j] * utils.multivariate_gaussian( self.X[i], self.means_[j], self.covariances_[j]) sp = p.sum() for j in range(k): gamma[i, j] = p[j] / sp # print(gamma) return np.argmax(gamma, axis=1)
beta_log = utils.beta_log(data_test, pi, mu, sigma_list, A, n_states) # Gamma (smoothing disribution) gamma_log = utils.gamma_log(alpha_log, beta_log) gamma = np.exp(gamma_log) # Csi (pair marginals) csi_log = np.zeros([time_steps - 1, n_states, n_states]) for t in range(time_steps - 1): aux = np.zeros([n_states, n_states]) for m in range(n_states): for l in range(n_states): aux[m, l] = alpha_log[m, t] + beta_log[l, t + 1] + np.log( A[l, m]) + np.log( utils.multivariate_gaussian(data_train[t + 1, :], mu[:, l], sigma_list[l])) b = np.max(aux) den = b + np.log(np.sum(np.exp(aux - b))) for i in range(n_states): for j in range(n_states): csi_log[t, j, i] = alpha_log[i, t] + beta_log[j, t + 1] + np.log( A[j, i]) + np.log( utils.multivariate_gaussian(data_train[t + 1, :], mu[:, j], sigma_list[j])) - den csi = np.exp(csi_log) plt.subplot(411) plt.plot(gamma[0, 0:100], 'c') plt.title("$p(z_t|x_1, \ldots , x_T)$ - HMM (Fake parameters) - Test data")
def gmm1(train_test, save_plots=True, n_clusters=4, max_it=200, show_plots=True, print_llk=False): data = utils.load_dataset(train_test) # Initialization of mu and pi with kmeans mu_hat, pi_hat = kmeans('train', save_plots=False, n_clusters=n_clusters, print_results=False) n_samples = data.shape[0] dim = data.shape[1] mu_hat = np.transpose(mu_hat) #[mu] = dim x 1 sig_hat = 100 * np.ones([n_clusters, 1]) tau = np.zeros([n_samples, n_clusters]) counter = 0 llik_old = 0 llik_new = 10 while ((counter < max_it) and np.abs(llik_new - llik_old) > 1e-8): llik_old = llik_new # E-step for i in range(n_samples): aux = np.zeros(n_clusters) for l in range(n_clusters): sig_hat_matrix = sig_hat[l] * np.eye(dim) aux[l] = (pi_hat[l] * utils.multivariate_gaussian( np.transpose(data[i, :]), mu_hat[:, l], sig_hat_matrix)) tau[i, :] = aux / np.sum(aux) # M-step for k in range(n_clusters): pi_hat[k] = np.sum(tau[:, k]) / n_samples # mu_hat weighted_samples = np.zeros([1, dim]) for n in range(n_samples): tau_ = tau[n, k] weighted_samples += tau_ * data[n, :] den = np.sum(tau[:, k]) mu_hat[:, k] = (np.transpose(weighted_samples) / den).reshape(dim) # sigma_hat weighted_sqnorm = 0 for n in range(n_samples): tau_ = tau[n, k] diff = data[n, :].reshape([-1, 1]) - mu_hat[:, k].reshape( [-1, 1]) # dim x 1 sq_norm = np.sum(diff**2) weighted_sqnorm += tau_ * sq_norm sig_hat[k, 0] = weighted_sqnorm / (2 * den) # Log likelihood sig_hat_list = [ sig_hat[0, 0] * np.identity(dim), sig_hat[1, 0] * np.identity(dim), sig_hat[2, 0] * np.identity(dim), sig_hat[3, 0] * np.identity(dim) ] llik_new = 0.0 for i in range(n_samples): for k in range(n_clusters): llik_new += tau[i, k] * (np.log( utils.multivariate_gaussian(np.transpose( data[i, :]), mu_hat[:, k], sig_hat_list[k])) + np.log(pi_hat[k])) llik_new = llik_new / n_samples counter += 1 if (print_llk): print('Centroid for GMM1 on train data') print('C1', mu_hat[:, 0]) print('C2', mu_hat[:, 1]) print('C3', mu_hat[:, 2]) print('C4', mu_hat[:, 3]) print('Log-likelihood for GMM1 on train data :', llik_new) if (train_test == 'test'): data = utils.load_dataset(train_test) n_samples = data.shape[0] for i in range(n_samples): aux = np.zeros(n_clusters) for l in range(n_clusters): aux[l] = (pi_hat[l] * utils.multivariate_gaussian( np.transpose(data[i, :]), mu_hat[:, l], sig_hat_list[l])) tau[i, :] = aux / np.sum(aux) # Log likelihood llik_new = 0.0 for i in range(n_samples): for k in range(n_clusters): llik_new += tau[i, k] * (np.log( utils.multivariate_gaussian(np.transpose( data[i, :]), mu_hat[:, k], sig_hat_list[k])) + np.log(pi_hat[k])) llik_new = llik_new / n_samples if (print_llk): print('Log-likelihood for GMM1 on test data:', llik_new) if (show_plots): colors = ['c', 'lightskyblue', 'mediumpurple', 'hotpink'] Z = np.argmax(tau, 1) for m in range(n_clusters): color = colors[m] cluster_samples = data[np.where(Z == m)] plt.plot(cluster_samples[:, 0], cluster_samples[:, 1], 'o', c=color, label='Cluster' + ' ' + str(m)) plt.scatter(mu_hat[0, m], mu_hat[1, m], marker='x', s=100, c=k, linewidths=5, zorder=10) ellipse_data = utils.plot_ellipse(x_cent=mu_hat[0, m], y_cent=mu_hat[1, m], cov=sig_hat_list[m], mass_level=0.9) plt.plot(ellipse_data[0], ellipse_data[1], c=color) plt.legend(loc='upper left', scatterpoints=1) plt.xlabel('Dimension 1') plt.ylabel('Dimension 2') plt.title('Gaussian Mixture Model 1 - ' + str(train_test) + ' data') if (save_plots): name = './Figures/gmm1_' + train_test + '.png' plt.savefig(name) plt.show() plt.clf()