def test_initialise(): X = [[1,2,3],[4,5,6]] M = [[0,1,1],[1,0,1]] K = 2 seed = 0 kmeans = KMeans(X,M,K) kmeans.initialise(seed) mins = [4.0,2.0,3.0] maxs = [4.0,2.0,6.0] assert numpy.array_equal(mins,kmeans.mins) assert numpy.array_equal(maxs,kmeans.maxs) mask_centroids = [[1,1,1],[1,1,1]] assert numpy.array_equal(mask_centroids,kmeans.mask_centroids) cluster_assignments = [-1,-1] assert numpy.array_equal(cluster_assignments,kmeans.cluster_assignments) centroids = [[4.0,2.0,4.2617147424925346],[4.0,2.0,4.2148024123512426]] assert numpy.array_equal(centroids,kmeans.centroids) distances = [0,0] assert numpy.array_equal(distances,kmeans.distances)
def initialise(self,init_S='random',init_FG='random'): assert init_S in ['random','exp'], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S assert init_FG in ['random','exp','kmeans'], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG self.S = 1./self.lambdaS if init_S == 'random': for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.S[k,l] = exponential_draw(self.lambdaS[k,l]) self.F, self.G = 1./self.lambdaF, 1./self.lambdaG if init_FG == 'random': for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)): self.F[i,k] = exponential_draw(self.lambdaF[i,k]) for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)): self.G[j,l] = exponential_draw(self.lambdaG[j,l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R,self.M,self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T,self.M.T,self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2 self.tau = self.alpha_s() / self.beta_s()
def initialise(self,init_S='random',init_FG='random'): assert init_S in ['random','exp'], "Unknown initialisation option for S: %s. Should be 'random' or 'exp'." % init_S assert init_FG in ['random','exp','kmeans'], "Unknown initialisation option for S: %s. Should be 'random', 'exp', or 'kmeans." % init_FG self.S = 1./self.lambdaS if init_S == 'random': for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.S[k,l] = exponential_draw(self.lambdaS[k,l]) self.F, self.G = 1./self.lambdaF, 1./self.lambdaG if init_FG == 'random': for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)): self.F[i,k] = exponential_draw(self.lambdaF[i,k]) for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)): self.G[j,l] = exponential_draw(self.lambdaG[j,l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R,self.M,self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T,self.M.T,self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2 self.tau = gamma_mode(self.alpha_s(), self.beta_s())
def test_initialise(): X = [[1, 2, 3], [4, 5, 6]] M = [[0, 1, 1], [1, 0, 1]] K = 2 seed = 0 kmeans = KMeans(X, M, K) kmeans.initialise(seed) mins = [4.0, 2.0, 3.0] maxs = [4.0, 2.0, 6.0] assert numpy.array_equal(mins, kmeans.mins) assert numpy.array_equal(maxs, kmeans.maxs) mask_centroids = [[1, 1, 1], [1, 1, 1]] assert numpy.array_equal(mask_centroids, kmeans.mask_centroids) cluster_assignments = [-1, -1] assert numpy.array_equal(cluster_assignments, kmeans.cluster_assignments) centroids = [[4.0, 2.0, 4.2617147424925346], [4.0, 2.0, 4.2148024123512426]] assert numpy.array_equal(centroids, kmeans.centroids) distances = [0, 0] assert numpy.array_equal(distances, kmeans.distances)
def initialise(self, init_S='random', init_FG='random', tauFSG={}): self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones( (self.I, self.K)) self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones( (self.K, self.L)) self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones( (self.J, self.L)) assert init_S in ['exp', 'random' ], "Unrecognised init option for S: %s." % init_S self.muS = 1. / self.lambdaS if init_S == 'random': for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)): self.muS[k, l] = exponential_draw(self.lambdaS[k, l]) assert init_FG in ['exp', 'random', 'kmeans' ], "Unrecognised init option for F,G: %s." % init_FG self.muF, self.muG = 1. / self.lambdaF, 1. / self.lambdaG if init_FG == 'random': for i, k in itertools.product(xrange(0, self.I), xrange(0, self.K)): self.muF[i, k] = exponential_draw(self.lambdaF[i, k]) for j, l in itertools.product(xrange(0, self.J), xrange(0, self.L)): self.muG[j, l] = exponential_draw(self.lambdaG[j, l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.muF = kmeans_F.clustering_results #+ 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.muG = kmeans_G.clustering_results #+ 0.2 # Initialise the expectations and variances self.expF, self.varF = numpy.zeros((self.I, self.K)), numpy.zeros( (self.I, self.K)) self.expS, self.varS = numpy.zeros((self.K, self.L)), numpy.zeros( (self.K, self.L)) self.expG, self.varG = numpy.zeros((self.J, self.L)), numpy.zeros( (self.J, self.L)) for k in range(0, self.K): self.update_exp_F(k) for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)): self.update_exp_S(k, l) for l in range(0, self.L): self.update_exp_G(l) # Initialise tau using the updates self.update_tau() #self.alpha_s, self.beta_s = self.alpha, self.beta self.update_exp_tau()
def initialise(self,init_S='random',init_FG='random',tauFSG={}): self.tauF = tauFSG['tauF'] if 'tauF' in tauFSG else numpy.ones((self.I,self.K)) self.tauS = tauFSG['tauS'] if 'tauS' in tauFSG else numpy.ones((self.K,self.L)) self.tauG = tauFSG['tauG'] if 'tauG' in tauFSG else numpy.ones((self.J,self.L)) assert init_S in ['exp','random'], "Unrecognised init option for S: %s." % init_S self.muS = 1./self.lambdaS if init_S == 'random': for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.muS[k,l] = exponential_draw(self.lambdaS[k,l]) assert init_FG in ['exp','random','kmeans'], "Unrecognised init option for F,G: %s." % init_FG self.muF, self.muG = 1./self.lambdaF, 1./self.lambdaG if init_FG == 'random': for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)): self.muF[i,k] = exponential_draw(self.lambdaF[i,k]) for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)): self.muG[j,l] = exponential_draw(self.lambdaG[j,l]) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R,self.M,self.K) kmeans_F.initialise() kmeans_F.cluster() self.muF = kmeans_F.clustering_results #+ 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T,self.M.T,self.L) kmeans_G.initialise() kmeans_G.cluster() self.muG = kmeans_G.clustering_results #+ 0.2 # Initialise the expectations and variances self.expF, self.varF = numpy.zeros((self.I,self.K)), numpy.zeros((self.I,self.K)) self.expS, self.varS = numpy.zeros((self.K,self.L)), numpy.zeros((self.K,self.L)) self.expG, self.varG = numpy.zeros((self.J,self.L)), numpy.zeros((self.J,self.L)) for k in range(0,self.K): self.update_exp_F(k) for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.update_exp_S(k,l) for l in range(0,self.L): self.update_exp_G(l) # Initialise tau using the updates self.update_tau() #self.alpha_s, self.beta_s = self.alpha, self.beta self.update_exp_tau()
def initialise(self, init_S='random', init_FG='random', expo_prior=1.): assert init_S in ['ones', 'random', 'exponential' ], "Unrecognised init option for S: %s." % init_S assert init_FG in ['ones', 'random', 'exponential', 'kmeans' ], "Unrecognised init option for F,G: %s." % init_FG if init_S == 'ones': self.S = numpy.ones((self.K, self.L)) elif init_S == 'random': self.S = numpy.random.rand(self.K, self.L) elif init_S == 'exponential': self.S = numpy.empty((self.K, self.L)) for k, l in itertools.product(xrange(0, self.K), xrange(0, self.L)): self.S[k, l] = exponential_draw(expo_prior) if init_FG == 'ones': self.F = numpy.ones((self.I, self.K)) self.G = numpy.ones((self.J, self.L)) elif init_FG == 'random': self.F = numpy.random.rand(self.I, self.K) self.G = numpy.random.rand(self.J, self.L) elif init_FG == 'exponential': self.F = numpy.empty((self.I, self.K)) self.G = numpy.empty((self.J, self.L)) for i, k in itertools.product(xrange(0, self.I), xrange(0, self.K)): self.F[i, k] = exponential_draw(expo_prior) for j, l in itertools.product(xrange(0, self.J), xrange(0, self.L)): self.G[j, l] = exponential_draw(expo_prior) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R, self.M, self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T, self.M.T, self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2
def initialise(self,init_S='random',init_FG='random',expo_prior=1.): assert init_S in ['ones','random','exponential'], "Unrecognised init option for S: %s." % init_S assert init_FG in ['ones','random','exponential','kmeans'], "Unrecognised init option for F,G: %s." % init_FG if init_S == 'ones': self.S = numpy.ones((self.K,self.L)) elif init_S == 'random': self.S = numpy.random.rand(self.K,self.L) elif init_S == 'exponential': self.S = numpy.empty((self.K,self.L)) for k,l in itertools.product(xrange(0,self.K),xrange(0,self.L)): self.S[k,l] = exponential_draw(expo_prior) if init_FG == 'ones': self.F = numpy.ones((self.I,self.K)) self.G = numpy.ones((self.J,self.L)) elif init_FG == 'random': self.F = numpy.random.rand(self.I,self.K) self.G = numpy.random.rand(self.J,self.L) elif init_FG == 'exponential': self.F = numpy.empty((self.I,self.K)) self.G = numpy.empty((self.J,self.L)) for i,k in itertools.product(xrange(0,self.I),xrange(0,self.K)): self.F[i,k] = exponential_draw(expo_prior) for j,l in itertools.product(xrange(0,self.J),xrange(0,self.L)): self.G[j,l] = exponential_draw(expo_prior) elif init_FG == 'kmeans': print "Initialising F using KMeans." kmeans_F = KMeans(self.R,self.M,self.K) kmeans_F.initialise() kmeans_F.cluster() self.F = kmeans_F.clustering_results + 0.2 print "Initialising G using KMeans." kmeans_G = KMeans(self.R.T,self.M.T,self.L) kmeans_G.initialise() kmeans_G.cluster() self.G = kmeans_G.clustering_results + 0.2
def cluster(R,M,K): kmeans = KMeans(R,M,K) kmeans.initialise() kmeans.cluster() return kmeans.clustering_results