def _expectation_gauss2(data, theta, sigma, pi): prob_cluster1 = (1-pi) * dmvnorm(data, mu=theta[0], sigma=sigma[0]) prob_cluster2 = pi * dmvnorm(data, mu=theta[1], sigma=sigma[1]) grouping = np.zeros_like(prob_cluster1, dtype=np.integer) grouping[np.where(prob_cluster1<prob_cluster2)] = 1 responsibilities = prob_cluster2 / ( prob_cluster1 + prob_cluster2) return grouping, responsibilities
def _expectation_gauss2(data, theta, sigma, pi): prob_cluster1 = (1 - pi) * dmvnorm(data, mu=theta[0], sigma=sigma[0]) prob_cluster2 = pi * dmvnorm(data, mu=theta[1], sigma=sigma[1]) grouping = np.zeros_like(prob_cluster1, dtype=np.integer) grouping[np.where(prob_cluster1 < prob_cluster2)] = 1 responsibilities = prob_cluster2 / (prob_cluster1 + prob_cluster2) return grouping, responsibilities
def gaussian_mixture(x, y, theta, sigma, pi): z = np.zeros((len(x), len(y))) for i, xval in enumerate(x): for j, yval in enumerate(y): data = np.array([xval, yval]).reshape((1, 2)) z1 = dmvnorm(data, mu=theta[0], sigma=sigma[0]) z2 = dmvnorm(data, mu=theta[1], sigma=sigma[1]) z[j, i] = ((1-pi) * z1) + (pi * z2) return z
def gaussian_mixture(x, y, theta, sigma, pi): z = np.zeros((len(x), len(y))) for i, xval in enumerate(x): for j, yval in enumerate(y): data = np.array([xval, yval]).reshape((1, 2)) z1 = dmvnorm(data, mu=theta[0], sigma=sigma[0]) z2 = dmvnorm(data, mu=theta[1], sigma=sigma[1]) z[j, i] = ((1 - pi) * z1) + (pi * z2) return z
def __gaussian_mixture(x, y, pi, mu, sigma): z = np.zeros((len(x), len(y))) number_of_clusters = pi.shape[0] p = mu.shape[1] for i, xval in enumerate(x): for j, yval in enumerate(y): data = np.array([xval, yval]).reshape((1, p)) zval = 0 for k in range(number_of_clusters): zval += pi[k] * dmvnorm(data, mu=mu[k], sigma=sigma[k]) z[i, j] = zval return z
def __gaussian_mixture(x, y, pi, mu, sigma): z = np.zeros((len(x), len(y))) number_of_clusters = pi.shape[0] p = mu.shape[1] for i, xval in enumerate(x): for j, yval in enumerate(y): data = np.array([xval, yval]).reshape((1,p)) zval = 0 for k in range(number_of_clusters): zval += pi[k] * dmvnorm(data, mu=mu[k], sigma=sigma[k]) z[i,j] = zval return z
def run(self, data, k, iterations=100): data = np.array(data) num_observations = data.shape[0] p = data.shape[1] xi = np.median(data, axis=0) dr = np.amax(data, axis=0) - np.amin(data, axis=0) kappa = np.zeros((p, p)) for i in range(p): kappa[i, i] = 1.0 / (dr[i]**2) ikappa = la.inv(kappa) h = (100.0 * self.__g / self.__alpha) * kappa gibbs_pi = np.zeros((iterations, k), dtype=np.double) gibbs_theta = np.zeros((iterations, k, p), dtype=np.double) gibbs_sigma = np.zeros((iterations, k, p, p), dtype=np.double) gibbs_beta = np.zeros((iterations, p, p), dtype=np.double) for j in range(k): gibbs_pi[0, j] = 1.0 / k gibbs_theta[0, j] = np.mean(data, axis=0) gibbs_sigma[0, j] = np.cov(data.T) for i in range(1, iterations): a = np.zeros((num_observations, k)) for m in range(k): a[:, m] = gibbs_pi[i - 1, m] * dmvnorm( data, mu=gibbs_theta[i - 1, m], sigma=gibbs_sigma[i - 1, m]) asum = np.sum(a, axis=1) z = np.zeros((num_observations, k), dtype=np.integer) for j in range(num_observations): z[j] = npr.multinomial(1, a[j] / asum[j]) gibbs_pi[i] = npr.dirichlet(np.sum(z, axis=0) + self.__delta) gibbs_beta[i] = rwish( 2.0 * self.__g + 2.0 * k * self.__alpha, la.inv(2.0 * h + 2.0 * self.__sum_isigma(gibbs_sigma[i - 1], k, p)))[0] y = [] n = np.zeros(k, dtype=np.integer) for m in range(k): pos_data = data[np.where(z[:, m] == 1)] y.append(pos_data) n[m] = pos_data.shape[0] assert len(y) == k for m in range(k): y_diff = y[m] - gibbs_theta[i - 1, m] y_sum = np.zeros((p, p), dtype=np.double) for l in range(n[m]): y_sum = y_sum + y_diff[l].reshape((p, 1)) * y_diff[l] cov = (2.0 * gibbs_beta[i]) + y_sum gibbs_sigma[i, m] = la.inv( rwish(2.0 * self.__alpha + n[m], la.inv(cov))[0]) for m in range(k): cov = la.inv(n[m] * la.inv(gibbs_sigma[i, m]) + kappa) mean_x = np.mean(y[m], axis=0) mean = cov.dot(n[m] * la.inv(gibbs_sigma[i, m]).dot(mean_x) + kappa.dot(xi)) gibbs_theta[i, m] = npr.multivariate_normal(mean, cov, size=1) return (gibbs_pi, gibbs_theta, gibbs_sigma)
def run(self,data,k,iterations=100): data = np.array(data) num_observations = data.shape[0] p = data.shape[1] xi = np.median(data,axis=0) dr = np.amax(data,axis=0) - np.amin(data,axis=0) kappa = np.zeros((p,p)) for i in range(p): kappa[i,i] = 1.0/(dr[i]**2) ikappa = la.inv(kappa) h = (100.0*self.__g/self.__alpha) * kappa gibbs_pi = np.zeros((iterations,k),dtype=np.double) gibbs_theta = np.zeros((iterations,k,p),dtype=np.double) gibbs_sigma = np.zeros((iterations,k,p,p),dtype=np.double) gibbs_beta = np.zeros((iterations,p,p),dtype=np.double) for j in range(k): gibbs_pi[0,j] = 1.0/k gibbs_theta[0,j] = np.mean(data,axis=0) gibbs_sigma[0,j] = np.cov(data.T) for i in range(1,iterations): a = np.zeros((num_observations,k)) for m in range(k): a[:,m] = gibbs_pi[i-1,m] * dmvnorm(data, mu=gibbs_theta[i-1,m],sigma=gibbs_sigma[i-1,m]) asum = np.sum(a,axis=1) z = np.zeros((num_observations,k),dtype=np.integer) for j in range(num_observations): z[j] = npr.multinomial(1,a[j]/asum[j]) gibbs_pi[i] = npr.dirichlet(np.sum(z,axis=0) + self.__delta) gibbs_beta[i] = rwish(2.0*self.__g + 2.0*k*self.__alpha, la.inv(2.0*h + 2.0*self.__sum_isigma(gibbs_sigma[i-1],k,p)))[0] y = [] n = np.zeros(k,dtype=np.integer) for m in range(k): pos_data = data[np.where(z[:,m]==1)] y.append(pos_data) n[m] = pos_data.shape[0] assert len(y) == k for m in range(k): y_diff = y[m] - gibbs_theta[i-1,m] y_sum = np.zeros((p,p),dtype=np.double) for l in range(n[m]): y_sum = y_sum + y_diff[l].reshape((p,1)) * y_diff[l] cov = (2.0 * gibbs_beta[i]) + y_sum gibbs_sigma[i,m] = la.inv(rwish(2.0*self.__alpha + n[m], la.inv(cov))[0]) for m in range(k): cov = la.inv(n[m] * la.inv(gibbs_sigma[i,m]) + kappa) mean_x = np.mean(y[m],axis=0) mean = cov.dot(n[m] * la.inv(gibbs_sigma[i,m]).dot(mean_x) + kappa.dot(xi)) gibbs_theta[i,m] = npr.multivariate_normal(mean,cov,size=1) return (gibbs_pi,gibbs_theta,gibbs_sigma)