def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7, 0.2, 0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7, 0.2, 0.1]) # Test plates in p p = np.random.dirichlet([1, 1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10, )) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p * np.ones( (10, 1))) pass
def test_moments(self): """ Test the moments of multinomial nodes. """ # Simple test X = Multinomial(1, [0.7, 0.2, 0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7, 0.2, 0.1]) # Test n X = Multinomial(10, [0.7, 0.2, 0.1]) u = X._message_to_child() self.assertAllClose(u[0], [7, 2, 1]) # Test plates in p n = np.random.randint(1, 10) p = np.random.dirichlet([1, 1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n) # Test plates in n n = np.random.randint(1, 10, size=(3, )) p = np.random.dirichlet([1, 1, 1, 1]) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n[:, None]) # Test plates in p and n n = np.random.randint(1, 10, size=(4, 1)) p = np.random.dirichlet([1, 1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n[..., None]) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Multinomial(1, P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10, )) X = Multinomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5 * p * np.ones( (10, 1))) pass
def test_moments(self): """ Test the moments of Dirichlet nodes. """ p = Dirichlet([2, 3, 4]) u = p._message_to_child() self.assertAllClose(u[0], special.psi([2,3,4]) - special.psi(2+3+4)) pass
def test_constant(self): """ Test the constant moments of Dirichlet nodes. """ p = Dirichlet([1, 1, 1]) p.initialize_from_value([0.5, 0.4, 0.1]) u = p._message_to_child() self.assertAllClose(u[0], np.log([0.5, 0.4, 0.1])) pass
def test_moments(self): """ Test the moments of Dirichlet nodes. """ p = Dirichlet([2, 3, 4]) u = p._message_to_child() self.assertAllClose(u[0], special.psi([2, 3, 4]) - special.psi(2 + 3 + 4)) pass
def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False): '''Only to be used when doing parameter optimization.''' self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) log_likelihood = Q.L[Q.iter-1] if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) return log_likelihood
def test_moments(self): """ Test the moments of categorical nodes. """ # Simple test X = Categorical([0.7,0.2,0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7,0.2,0.1]) # Test plates in p p = np.random.dirichlet([1,1], size=3) X = Categorical(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Categorical(P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10,)) X = Categorical(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p*np.ones((10,1))) pass
def test_moments(self): """ Test the moments of multinomial nodes. """ # Simple test X = Multinomial(1, [0.7,0.2,0.1]) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], [0.7,0.2,0.1]) # Test n X = Multinomial(10, [0.7,0.2,0.1]) u = X._message_to_child() self.assertAllClose(u[0], [7,2,1]) # Test plates in p n = np.random.randint(1, 10) p = np.random.dirichlet([1,1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n) # Test plates in n n = np.random.randint(1, 10, size=(3,)) p = np.random.dirichlet([1,1,1,1]) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n[:,None]) # Test plates in p and n n = np.random.randint(1, 10, size=(4,1)) p = np.random.dirichlet([1,1], size=3) X = Multinomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n[...,None]) # Test with Dirichlet prior P = Dirichlet([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) p1 = np.exp(logp[1]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Multinomial(1, P) u = X._message_to_child() p = np.array([p0, p1]) self.assertAllClose(u[0], p) # Test with broadcasted plates P = Dirichlet([7, 3], plates=(10,)) X = Multinomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5*p*np.ones((10,1))) pass
def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False): '''Performs one run of the BBDP according to the specified parameters.''' print("Transforming WCS participant data into binary vectors...") x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh) print("Finished transforming participant data") self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) #Write cluster results to a file if self.write_to_file: if end: save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) else: save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) while path.exists(save_path+".txt"): #save_path already exists try: old_file_num = int(save_path[save_path.find('(')+1:-1]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')' except ValueError: save_path = save_path + " (1)" self.save_path = save_path file = open(path.abspath(self.save_path+".txt"), 'w') #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j) if 'Z' not in self.in_file: for i in range(len(self.z)): line = "\t".join([str(x) for x in self.z[i]]) + "\n" file.write(line) file.write('---Z\n') self.in_file.append('Z') #Write cluster weights matrix R (proportional to the size of the resulting clusters) if 'R' not in self.in_file: line = "\t".join([str(x) for x in self.r]) + "\n" file.write(line) file.write('---R\n') self.in_file.append('R') #Write deterministic cluster assignments with the corresponding participant key if 'C' not in self.in_file: line1 = "\t".join([str(x) for x in self.participant_list]) + "\n" line2 = "\t".join([str(x) for x in self.c_assign]) + "\n" file.write(line1) file.write(line2) file.write('---C\n') self.in_file.append('C') file.close() return self.c_assign
# coding: utf-8 ## Gaussian mixture model # Do some stuff: # In[2]: from bayespy.nodes import Dirichlet alpha = Dirichlet([1e-3, 1e-3, 1e-3]) print(alpha._message_to_child()) # Nice!