def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ _, d = X.shape n, K = post.shape mu = np.zeros((K,d)) var = np.zeros(K) p = np.zeros(K) for j in range(K): # find mixture model given the derivative of the fixed likelihood function mu[j] = np.sum(post[:,j].reshape(n,1) * X, axis=0) / np.sum(post[:,j]) var[j] = np.sum(post[:,j].reshape(n,1) * np.square(X - mu[j])) / (d * np.sum(post[:,j])) p[j] = (1 / n) * np.sum(post[:,j]) return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n,d=X.shape _,k=post.shape meu=np.zeros((k,d)) variance=np.zeros((k,)) n_hat=post.sum(axis=0) p=n_hat/len(X) for i in range((k)): meu[i]=1/n_hat[i]*np.sum(np.vstack(post[:,i])*(X),axis=0) variance[i]=1/(d*n_hat[i])*\ np.sum( (post[:,i])*np.linalg.norm(X-meu[i],axis=1)**2, ) return GaussianMixture(mu=meu,var=variance,p=p)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape K = post.shape[1] nj = np.sum(post, axis=0) # shape is (K, ) pi = nj / n # Cluster probs; shape is (K, ) mu = (post.T @ X) / nj.reshape(-1, 1) # Revised means; shape is (K,d) norms = np.linalg.norm(X[:, None] - mu, ord=2, axis=2)**2 # Vectorized version var = np.sum(post * norms, axis=0) / (nj * d ) # Revised variance; shape is (K, ) return GaussianMixture(mu, var, pi)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ K = post.shape[1] n, d = X.shape post_sum = np.sum(post, axis=0) mu_hat = np.dot(post.T, X) / post_sum.reshape(K, 1) p_hat = post_sum / n norm = np.linalg.norm(X[:, None] - mu_hat, axis=2)**2 var_hat = np.sum(post * norm, axis=0) / (d * post_sum) gaussian_mixture = GaussianMixture(mu_hat, var_hat, p_hat) return gaussian_mixture
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape K = post.shape[1] nj = np.sum(post, axis=0) # shape is (K, ) pi = nj / n # Cluster probs; shape is (K, ) mu = (np.matmul(post.T, X)) / nj.reshape( -1, 1) # Revised means; shape is (K,d) norms = np.linalg.norm(X[:, None] - mu, ord=2, axis=2)**2 # Vectorized version # norms = np.zeros((n, K), dtype=np.float64) # For loopy version: Matrix to hold all the norms: (n,K) # for i in range(n): # dist = X[i,:] - mu # norms[i,:] = np.sum(dist**2, axis=1) var = np.sum(post * norms, axis=0) / (nj * d ) # Revised variance; shape is (K, ) return GaussianMixture(mu, var, pi) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n = X.shape[0] d = X.shape[1] k = post.shape[1] n_hat = np.sum(post, axis=0) p_hat = n_hat / n mu_hat = np.ndarray((k, d)) pj_i_xi = np.matmul(np.transpose(post), X) for j in range(k): mu_hat[j] = 1 / n_hat[j] * pj_i_xi[j] var_hat = np.ndarray((k, )) for j in range(k): pj_i_var = 0 for i in range(n): pj_i_var += post[i][j] * np.linalg.norm(X[i] - mu_hat[j])**2 var_hat[j] = 1 / (n_hat[j] * d) * pj_i_var mixture = GaussianMixture(p=p_hat, mu=mu_hat, var=var_hat) return mixture
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ #raise NotImplementedError def squared_distance(x, mu): sd = np.square(np.linalg.norm(x-mu, 2, 1)) return sd # mu = np.matmul(post.T, X)/np.sum(post) K = post.shape[1] d = X.shape[1] n = X.shape[0] var = np.ndarray(K) p = np.ndarray(K) mu = np.ndarray((K,d)) for k in range(K): mu[k] = np.dot(X.T, post.T[k])/np.sum(post.T[k]) var[k] = np.dot(post.T[k], squared_distance(X, mu[k]))/(np.sum(post.T[k]) * d) p[k] = np.sum(post.T[k]) return GaussianMixture(mu, var, p/np.sum(p))
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ d = X.shape[1] n, K = post.shape mu = np.zeros((K, d)) var = np.zeros(K, ) p = post.sum(axis=0) / n for j in range(K): for i in range(n): x = X[i] mu[j, :] += x * post[i, j] mu[j, :] /= (n * p[j]) for j in range(K): sigma = np.zeros((d, d)) for i in range(n): x = X[i] sigma += post[i, j] * np.dot(x - mu[j, :], x - mu[j, :]) var[j] = sigma[0][0] / d var[j] /= (n * p[j]) return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ #raise NotImplementedError n, d = X.shape n_hat = np.sum(post, axis=0) p = n_hat / n mu = np.matmul(post.T, X) / n_hat.reshape(-1, 1) diff = X - mu.reshape(-1, 1, d) sse = np.linalg.norm(diff, ord=2, axis=2)**2 # k by n sse = sse.T var = np.sum(post * sse, axis=0) / (n_hat * d) mixture = GaussianMixture(mu, var, p) return mixture
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, K = post.shape _, d = X.shape n_hat = np.sum(post, axis=0) p = n_hat / n mu = np.zeros((K, d)) var = np.zeros(K) for k in range(K): mu[k, :] = post[:, k] @ X / post[:, k].sum() var[k] = ((X - mu[k, :])** 2).sum(axis=1) @ post[:, k] / (d * post[:, k].sum()) return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ shaped_X = X.reshape((X.shape[0], 1, X.shape[1])).repeat(post.shape[1], axis=1) shaped_post = post.reshape((post.shape[0], post.shape[1], 1)) ponderated_points = shaped_X * shaped_post full_sum = ponderated_points.sum(axis=0) weights_sum = shaped_post.sum(axis=0) mu = full_sum / weights_sum shaped_mu = mu.reshape((1, mu.shape[0], mu.shape[1])).repeat(X.shape[0], axis=0) diffs = shaped_X - shaped_mu sq_diffs = (diffs * diffs).sum(axis=2, keepdims=True) var_not_normalized = (sq_diffs * shaped_post).sum(axis=0) var = (var_not_normalized / (X.shape[1] * weights_sum)).reshape( (var_not_normalized.shape[0])) pond = post.sum(axis=0) / post.shape[0] return GaussianMixture(mu, var, pond)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ # Define parameters and dimensions n, d = X.shape _, K = post.shape mu = np.zeros((K, d)) p = np.zeros(K) var = np.zeros(K) _post = np.copy(np.transpose(post)) # Compute p p = np.sum(_post, axis=1) / n mu = np.divide(np.matmul(_post, X), np.transpose(np.tile(np.sum(_post, axis=1), (d, 1)))) for j in range(K): for i in range(n): var[j] = var[j] + _post[j, i] * np.linalg.norm( X[i, :] - mu[j, :])**2 / (d * np.sum(_post[j, :])) new_mixture = GaussianMixture(mu, var, p) return new_mixture
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, K , d = post.shape[0], post.shape[1], X.shape[1] # hat_n_k = sum( p(k | i) ) hat_n_k = np.sum(post, axis=0) # 1 x K hat_n_k = hat_n_k.reshape(K, 1) # K x 1 # hat_mix_p = hat_n_k/n hat_mix_p = hat_n_k/n # K x 1 hat_mix_p = hat_mix_p.reshape(K) # hat_mu_k = (1/hat_n_k) * sum( p(k|i)*x_i ) hat_mu_k = post.T.dot(X)/hat_n_k # K x d # hat_var = (1/(hat_n_k * d)) * sum(p(k|i) * (x_i - hat_mu_k)**2 ) xx_mu_2 = np.square((X.reshape(n, 1, d) - hat_mu_k.reshape(1, K, d)), dtype=np.float) # n x K x d xx_mu_2 = np.sum(xx_mu_2, axis=2, dtype=np.float) # n x K prob_xx_mu_2 = post * xx_mu_2 # n x K # hat_var_k = (1/(hat_n_k*d)) * sum(post * (xx - mu)**2 ) hat_var_k = np.sum(prob_xx_mu_2, axis=0, dtype=np.float)/(d * hat_n_k.reshape(1, K)) # 1 x K hat_var_k = hat_var_k.reshape(K) mixture = GaussianMixture(hat_mu_k, hat_var_k, hat_mix_p) return mixture
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ [n, d] = np.shape(X) [n, K] = np.shape(post) n_K = np.sum(post, axis=0) # (1,K) -> (K,) p_K = n_K / n # (1,K) -> (K,) mu_K = np.matmul(post.transpose(), X) # (K,d) mu_K = mu_K.transpose() / n_K mu_K = mu_K.transpose() var_K = [] # list for i in range(K): A = np.linalg.norm(X - mu_K[i], axis=1) # (n,1) -> (n,) B = np.matmul(A**2, post[:, i]) var_K.append(B / d / n_K[i]) # (1,K) -> (K,) var_K = np.asarray(var_K) # convert to array return GaussianMixture(mu_K, var_K, p_K)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ K = post.shape[1] n = X.shape[0] d = X.shape[1] n_hat = np.zeros(K) p_hat = np.zeros(K) mu_hat = np.zeros((K,d)) sigma_hat = np.zeros(K) for j in range(K): for i in range(n): n_hat[j] += post[i,j] p_hat[j] = n_hat[j]/n sum_px = 0 for k in range(n): sum_px += post[k,j] * X[k] mu_hat[j] = 1/n_hat[j] * sum_px sum_px = 0 for k in range(n): sum_px += post[k,j] * np.power(np.linalg.norm(X[k] - mu_hat[j]),2) sigma_hat[j] = 1/(n_hat[j]*d) * sum_px return GaussianMixture(mu_hat, sigma_hat, p_hat)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ nums = np.sum(post, axis=0) probs = nums / X.shape[0] mus = (1 / nums)[:, None] * np.dot(X.T, post).T var = np.zeros(post.shape[1]) for j in range(post.shape[1]): var[j] = (1/(nums[j]*X.shape[1])) * \ np.dot((numpy.linalg.norm(X-mus[j], axis=1)**2), post[:, j]).T return GaussianMixture(mus, var, probs)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ # https://en.wikipedia.org/wiki/EM_algorithm_and_GMM_model n, d = X.shape K = post.shape[1] n_hat = np.sum(post, axis=0) # Adding up the posterior probability by column p_hat = n_hat/n # Weight for the mixture component mu_hat = (post.T @ X)/n_hat.reshape(K,1) # Compute the mean of the cluster # Compute the variance norm = np.linalg.norm(X[:, None] - mu_hat, ord=2, axis=2) ** 2 var_hat = np.sum(post * norm, axis=0) / (n_hat * d) # Return optimal mean, variance and weight return GaussianMixture(mu_hat, var_hat, p_hat) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape mu_rev, _, _ = mixture K = mu_rev.shape[0] pi_rev = np.sum(post, axis=0) / n delta = X.astype(bool).astype(int) denom = post.T @ delta numer = post.T @ X update_indices = np.where(denom >= 1) mu_rev[update_indices] = numer[update_indices] / denom[update_indices] denom_var = np.sum(post * np.sum(delta, axis=1).reshape(-1, 1), axis=0) norms = np.sum( X**2, axis=1)[:, None] + (delta @ mu_rev.T**2) - 2 * (X @ mu_rev.T) var_rev = np.maximum( np.sum(post * norms, axis=0) / denom_var, min_variance) return GaussianMixture(mu_rev, var_rev, pi_rev)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape _, K = post.shape n_hat = post.sum(axis=0) p = n_hat / n cost = 0 mu = np.zeros((K, d)) var = np.zeros(K) for j in range(K): mu[j, :] = post[:, j] @ X / n_hat[j] sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j] cost += sse var[j] = sse / (d * n_hat[j]) return GaussianMixture(mu, var, p) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ mu = np.zeros((post.shape[1], X.shape[1])) var = np.zeros((post.shape[1],)) nj = np.sum(post, axis=0) p = nj/X.shape[0] for j in range(post.shape[1]): for i in range(X.shape[0]): mu[j] += post[i][j]*X[i] mu[j] /= nj[j] for i in range(X.shape[0]): var[j] += post[i][j]*np.linalg.norm(X[i]-mu[j])**2; var[j] /= nj[j]*X.shape[1] return GaussianMixture(mu, var, p) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ # Get model parameters n, d = X.shape K = post.shape[1] # New model values nj = np.sum(post, axis=0) pi = nj / n mu = (post.T @ X) / nj.reshape(-1,1) norms = np.linalg.norm(X[:, None] - mu, ord=2, axis=2)**2 var = np.sum(post*norms, axis=0) / (nj*d) return GaussianMixture(mu, var, pi)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape _, K = post.shape n_hat = post.sum(axis=0) p = n_hat / n mu = np.zeros((K, d)) var = np.zeros(K) for j in range(K): # computing mean mu[j, :] = (X * post[:, j, None]).sum(axis=0) / n_hat[j] # computing variance sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j] var[j] = sse / (d * n_hat[j]) return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape _, K = post.shape n_hat = post.sum(axis=0) p = n_hat / n delta = (X != 0) C = delta.sum(axis=1) mu = mixture.mu var = mixture.var for j in range(K): support = post[:, j] @ delta mu[j, :] = np.where(support >= 1, post[:, j] @ (delta * X) / support, mu[j, :]) sse = (delta * (mu[j] - X) ** 2).sum(axis=1) @ post[:, j] var_new = sse / (post[:, j] @ C) var[j] = var_new if var_new > min_variance else min_variance return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape K = post.shape[1] p = np.sum(post, axis=0) / n mu = np.dot(np.transpose(post), X) / (np.sum(post, axis=0)).reshape(K, 1) Sum = np.zeros((1, K)) var = np.zeros((1, K)) for i in range(n): for j in range(K): Sum[:, j] += post[i, j] * (np.linalg.norm(X[i, :] - mu[j, :]))**2 var = np.squeeze(Sum / (d * np.sum(post, axis=0))) return GaussianMixture(mu, var, p)
def mstep(X: np.ndarray, post: np.ndarray) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape _, K = post.shape n_k = np.sum(post, axis=0) mu_k = np.divide(np.dot(post.T, X), n_k.reshape(-1, 1)) temp = np.zeros((n, K)) for i in range(n): for k in range(K): temp[i, k] = np.dot((X[i, :] - mu_k[k, :]), (X[i, :] - mu_k[k, :])) * post[i, k] summation = temp.sum(axis=0) #summation = np.hstack([np.sum(np.dot(np.inner((X[i, :] - mu_k), (X[i, :] - mu_k)), post[i, :])) for i in range(n)]) #print(summation) var_k = np.divide(summation, n_k) / d p_k = n_k / n #print(n_k) return GaussianMixture(mu_k, var_k, p_k)
def mstep(X: np.ndarray, post: np.ndarray) -> Tuple[GaussianMixture, float]: """M-step: Updates the gaussian mixture. Each cluster yields a component mean and variance. Args: X: (n, d) array holding the data post: (n, K) array holding the soft counts for all components for all examples Returns: GaussianMixture: the new gaussian mixture float: the distortion cost for the current assignment """ n, d = X.shape _, K = post.shape n_hat = post.sum(axis=0) p = n_hat / n cost = 0 mu = np.zeros((K, d)) var = np.zeros(K) for j in range(K): mu[j, :] = post[:, j] @ X / n_hat[j] sse = ((mu[j] - X)**2).sum(axis=1) @ post[:, j] cost += sse var[j] = sse / (d * n_hat[j]) return GaussianMixture(mu, var, p), cost
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ # n, d = X.shape # _, K = post.shape # # n_hat = post.sum(axis=0) # p_hat = n_hat / n # # mu_hat = (1 / n_hat.reshape(K, 1)) * post.T @ X # # norm = np.power(np.linalg.norm(X[:, np.newaxis] - mu_hat, axis=2), 2) # summation = np.sum(post * norm, axis=0) # # var_hat = (1 / (n_hat * d)) * summation n, d = X.shape _, K = post.shape n_hat = post.sum(axis=0) p = n_hat / n mu = mixture.mu.copy() var = np.zeros(K) for j in range(K): sse, weight = 0, 0 for l in range(d): mask = (X[:, l] != 0) n_sum = post[mask, j].sum() if (n_sum >= 1): # Updating mean mu[j, l] = (X[mask, l] @ post[mask, j]) / n_sum # Computing variance sse += ((mu[j, l] - X[mask, l])**2) @ post[mask, j] weight += n_sum var[j] = sse / weight if var[j] < min_variance: var[j] = min_variance return GaussianMixture(mu, var, p) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = 0.25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ #raise NotImplementedError n, d = X.shape mu = mixture.mu d_s = np.sum(X != 0, axis=1) n_hat = np.sum(post, axis=0) p = n_hat / n # condition for mean update mask = np.int32(X != 0) condition = np.matmul(post.T, mask) #new_mu = np.matmul(post.T, X) / np.matmul(post.T, mask) #new_mu = np.matmul(post.T, X)/(condition+1e-16) new_mu = np.divide(np.matmul(post.T, X), condition, out=np.zeros_like(mu), where=condition != 0) mu[condition >= 1] = new_mu[condition >= 1] #mu = new_mu diff = np.where(X != 0, X - mu.reshape(-1, 1, d), 0) sse = np.linalg.norm(diff, ord=2, axis=2)**2 #sse = np.exp(2*np.log(np.linalg.norm(diff, ord=2, axis=2))) new_var = (1 / np.matmul(post.T, d_s)) * np.sum(post.T * sse, axis=1) # set a minimum varaince tp prevent variance from going to zero die to a # small number of points being assigned to them new_var[new_var < min_variance] = min_variance # mu = np.matmul(post.T, X) / n_hat.reshape(-1, 1) # diff = X - mu.reshape(-1, 1, d) # sse = np.linalg.norm(diff, ord=2, axis=2)**2 # k by n # sse = sse.T # var = np.sum(post * sse, axis=0)/(n_hat * d) mixture = GaussianMixture(mu, new_var, p) return mixture
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ mu = np.zeros((post.shape[1], X.shape[1])) var = np.zeros((post.shape[1], )) for k in range(post.shape[1]): for l in range(X.shape[1]): for u in range(X.shape[0]): if (X[u][l] != 0): mu[k][l] += post[u][k] * X[u][l] sm = 0 for u in range(post.shape[0]): if (X[u][l] != 0): sm += post[u][k] if (sm >= 1): mu[k][l] /= sm else: mu[k][l] = mixture.mu[k][l] p = np.sum(post, axis=0) / X.shape[0] for k in range(post.shape[1]): v = 0 for u in range(X.shape[0]): mgn = 0 for idx in range(X.shape[1]): if (X[u][idx] != 0): mgn += (X[u][idx] - mu[k][idx])**2 v += mgn * post[u][k] ct = 0 for u in range(X.shape[0]): ct += len(np.nonzero(X[u])[0]) * post[u][k] v /= ct if (v < min_variance): var[k] = min_variance else: var[k] = v return GaussianMixture(mu, var, p) raise NotImplementedError
def mstep(X: np.ndarray, post: np.ndarray, mixture: GaussianMixture, min_variance: float = .25) -> GaussianMixture: """M-step: Updates the gaussian mixture by maximizing the log-likelihood of the weighted dataset Args: X: (n, d) array holding the data, with incomplete entries (set to 0) post: (n, K) array holding the soft counts for all components for all examples mixture: the current gaussian mixture min_variance: the minimum variance for each gaussian Returns: GaussianMixture: the new gaussian mixture """ n, d = X.shape K, _ = mixture.mu.shape indicator = X != 0 mu = mixture.mu for k in range(K): for col in range(d): if np.dot(post[:, k], indicator[:, col]) <= 1: mu[k, col] = mu[k, col] #mu[k, col] = np.dot(np.multiply(post[:, k], indicator[:, col]), X[:, col])/np.dot(post[:, k], indicator[:, col]) else: mu[k, col] = np.dot(np.multiply(post[:, k], indicator[:, col]), X[:, col]) / np.dot( post[:, k], indicator[:, col]) ## var caluculation normalizer = np.sum(np.multiply(post, np.sum(indicator, axis=1, keepdims=True)), axis=0) temp = np.zeros((n, K)) for i in range(n): for k in range(K): filter = np.where(X[i, :] != 0) temp[i, k] = np.dot((X[i, :][filter] - mu[k, :][filter]), (X[i, :][filter] - mu[k, :][filter])) * post[i, k] summation = temp.sum(axis=0) var = np.divide(summation, normalizer) var = np.maximum(var, min_variance) n_k = np.sum(post, axis=0) p = n_k / n return GaussianMixture(mu, var, p)