def gmmmemberships(mu,S,priors,x,weights=1,initcovars=None): if initcovars is None: initcovars = S.copy() # number of data points n = x.shape[0] # dimensionality of data d = x.shape[1] # number of clusters k = mu.shape[0] # allocate output gamma = num.zeros((n,k)) # normalization constant normal = (2.0*num.pi)**(num.double(d)/2.0) #print 'd = %d, normal = %f' % (d,normal) #print 'mu = ' #print mu #print 'S = ' #for j in range(k): # print S[:,:,j] #print 'priors = ' #print priors #print 'weights = ' #print weights # compute the activation for each data point for j in range(k): #print 'j = %d' % j #print 'mu[%d,:] = '%j + str(mu[j,:]) #print 'S[:,:,%d] = '%j + str(S[:,:,j]) diffs = x - mu[j,:] #print 'diffs = ' #print diffs try: c = decomp.cholesky(S[:,:,j]) except num.linalg.linalg.LinAlgError: if DEBUG: print 'S[:,:,%d] = '%j + str(S[:,:,j]) + ' is singular' if DEBUG: print 'Reverting to initcovars[:,:,%d] = '%j + str(initcovars[:,:,j]) S[:,:,j] = initcovars[:,:,j] c = decomp.cholesky(S[:,:,j]) #print 'chol(S[:,:,%d]) = '%j + str(c) temp = num.transpose(num.linalg.solve(num.transpose(c),num.transpose(diffs))) #print 'temp = ' #print temp gamma[:,j] = num.exp(-.5*num.sum(temp**2,axis=1))/(normal*num.prod(num.diag(c))) #print 'gamma[:,%d] = ' % j #print gamma[:,j] # include prior gamma *= priors #print 'after including prior, gamma = ' #print gamma # compute negative log likelihood e = -num.sum(num.log(num.sum(gamma,axis=1))*weights) #print 'e = %f' % e s = num.sum(gamma,axis=1) #print 's = ' #print s # make sure we don't divide by 0 s[s==0] = 1 gamma /= s.reshape(n,1) #print 'gamma = ' #print gamma return (gamma,e)
D = num.zeros((nclusts,n)) E = num.zeros((nclusts,n)) for i in range(nclusts): D[i,:] = num.sum( (x - c[i,:])**2, axis=1 ) E[i,:] = num.sum((x - num.tile(c[i,:],[n,1]))**2,axis=1) print "D[%d] = "%i + str(D[i,:]) print "E[%d] = "%i + str(E[i,:]) gamma1 = num.zeros((n,nclusts)) gamma2 = num.zeros((n,nclusts)) for j in range(nclusts): print "j = " + str(j) print "c.shape = " + str(c.shape) diffs = x - c[j,:] zz = S[0,0,j]*S[1,1,j] - S[0,1,j]**2 temp1 = (diffs[:,0]**2*S[1,1,j] - 2*diffs[:,0]*diffs[:,1]*S[0,1,j] + diffs[:,1]**2*S[0,0,j]) / zz print "temp1 = " + str(temp1) ch = decomp.cholesky(S[:,:,j]) temp2 = num.transpose(num.linalg.solve(num.transpose(ch),num.transpose(diffs))) temp2 = num.sum(temp2**2,axis=1) gamma1[:,j] = num.exp(-.5*temp1)/(normal*num.sqrt(zz)) gamma2[:,j] = num.exp(-.5*temp2)/(normal*num.prod(num.diag(ch))) print "temp2 = " + str(temp2) print "sigma1 = " + str(num.sqrt(zz)) print "sigma2 = " + str(num.prod(num.diag(ch))) print "gamma1 = " + str(gamma1[:,j]) print "gamma2 = " + str(gamma2[:,j])
def fixsmallpriors(x, mu, S, priors, initcovars, gamma=None): # print 'calling fixsmallpriors with: ' # print 'mu = ' + str(mu) # print 'S = ' # for i in range(S.shape[2]): # print S[:,:,i] # print 'priors = ' + str(priors) # for i in range(initcovars.shape[2]): # print 'initcovars[:,:,%d]: '%i # print initcovars[:,:,i] # print 'initcovars[:,:,%d].shape: '%i + str(initcovars[:,:,i].shape) MINPRIOR = 0.01 issmall = priors < 0.01 if not issmall.any(): return n = x.shape[0] d = x.shape[1] k = mu.shape[0] normal = (2.0 * num.pi) ** (num.double(d) / 2.0) if gamma is None: # compute the density for each x from each mixture component gamma = num.zeros((n, k)) for i in range(k): diffs = x - num.tile(mu[i, :], [n, 1]) c = decomp.cholesky(S[:, :, i]) temp = num.transpose(num.linalg.solve(num.transpose(c), num.transpose(diffs))) gamma[:, i] = num.exp(-0.5 * num.sum(temp ** 2, axis=1)) / (normal * num.prod(num.diag(c))) # end gamma is None # loop through all small priors smalli, = num.where(issmall) for i in smalli: print "fixing cluster %d with small prior = %f: " % (i, priors[i]) # compute mixture density of each data point p = num.sum(gamma * num.tile(priors, [n, 1]), axis=1) # print 'samples: ' # print x # print 'density of each sample: ' # print p # choose the point with the smallest probability j = p.argmin() print "lowest density sample: x[%d] = " % j + str(x[j, :]) # create a new cluster mu[i, :] = x[j, :] S[:, :, i] = initcovars[:, :, i] priors *= (1 - MINPRIOR) / (1.0 - priors[i]) priors[i] = MINPRIOR print "reset cluster %d to: " % i print "mu = " + str(mu[i, :]) print "S = " print S[:, :, i] print "S.shape: " + str(S[:, :, i].shape) print "prior = " + str(priors[i]) # update gamma diffs = x - num.tile(mu[i, :], [n, 1]) c = decomp.cholesky(S[:, :, i]) temp = num.transpose(num.linalg.solve(num.transpose(c), num.transpose(diffs))) gamma[:, i] = num.exp(-0.5 * num.sum(temp ** 2, axis=1)) / (normal * num.prod(num.diag(c)))