def test_init(self): """ Test the creation of Bernoulli nodes. """ # Some simple initializations X = Bernoulli(0.5) X = Bernoulli(Beta([2, 3])) # Check that plates are correct X = Bernoulli(0.7, plates=(4, 3)) self.assertEqual(X.plates, (4, 3)) X = Bernoulli(0.7 * np.ones((4, 3))) self.assertEqual(X.plates, (4, 3)) X = Bernoulli(Beta([4, 3], plates=(4, 3))) self.assertEqual(X.plates, (4, 3)) # Invalid probability self.assertRaises(ValueError, Bernoulli, -0.5) self.assertRaises(ValueError, Bernoulli, 1.5) # Inconsistent plates self.assertRaises(ValueError, Bernoulli, 0.5 * np.ones(4), plates=(3, )) # Explicit plates too small self.assertRaises(ValueError, Bernoulli, 0.5 * np.ones(4), plates=(1, )) pass
def _setup_bernoulli_mixture(): """ Setup code for the hinton tests. This code is from http://www.bayespy.org/examples/bmm.html """ np.random.seed(1) p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1] p = np.array([p0, p1, p2]) z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 10 R = Dirichlet(K * [1e-5], name='R') Z = Categorical(R, plates=(N, 1), name='Z') P = Beta([0.5, 0.5], plates=(D, K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) return (R, P, Z)
def test_init(self): """ Test the creation of beta nodes. """ # Some simple initializations p = Beta([1.5, 4.2]) # Check that plates are correct p = Beta([2, 3], plates=(4, 3)) self.assertEqual(p.plates, (4, 3)) p = Beta(np.ones((4, 3, 2))) self.assertEqual(p.plates, (4, 3)) # Parent not a vector self.assertRaises(ValueError, Beta, 4) # Parent vector has wrong shape self.assertRaises(ValueError, Beta, [4]) self.assertRaises(ValueError, Beta, [4, 4, 4]) # Parent vector has invalid values self.assertRaises(ValueError, Beta, [-2, 3]) # Plates inconsistent self.assertRaises(ValueError, Beta, np.ones((4, 2)), plates=(3, )) # Explicit plates too small self.assertRaises(ValueError, Beta, np.ones((4, 2)), plates=(1, )) pass
def test_moments(self): """ Test the moments of Bernoulli nodes. """ # Simple test X = Bernoulli(0.7) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], 0.7) # Test plates in p p = np.random.rand(3) X = Bernoulli(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with beta prior P = Beta([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Bernoulli(P) u = X._message_to_child() self.assertAllClose(u[0], p0) # Test with broadcasted plates P = Beta([7, 3], plates=(10, )) X = Bernoulli(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p0 * np.ones(10)) pass
def test_moments(self): """ Test the moments of Bernoulli nodes. """ # Simple test X = Bernoulli(0.7) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], 0.7) # Test plates in p p = np.random.rand(3) X = Bernoulli(p) u = X._message_to_child() self.assertAllClose(u[0], p) # Test with beta prior P = Beta([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Bernoulli(P) u = X._message_to_child() self.assertAllClose(u[0], p0) # Test with broadcasted plates P = Beta([7, 3], plates=(10,)) X = Bernoulli(P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p0 * np.ones(10)) pass
def test_moments(self): """ Test the moments of binomial nodes. """ # Simple test X = Binomial(1, 0.7) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], 0.7) # Test n X = Binomial(10, 0.7) u = X._message_to_child() self.assertAllClose(u[0], 10*0.7) # Test plates in p n = np.random.randint(1, 10) p = np.random.rand(3) X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n) # Test plates in n n = np.random.randint(1, 10, size=(3,)) p = np.random.rand() X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n) # Test plates in p and n n = np.random.randint(1, 10, size=(4,1)) p = np.random.rand(3) X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p*n) # Test with beta prior P = Beta([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Binomial(1, P) u = X._message_to_child() self.assertAllClose(u[0], p0) # Test with broadcasted plates P = Beta([7, 3], plates=(10,)) X = Binomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5*p0*np.ones(10)) pass
def test_moments(self): """ Test the moments of beta nodes. """ p = Beta([2, 3]) u = p._message_to_child() self.assertAllClose(u[0], special.psi([2, 3]) - special.psi(2 + 3)) pass
def test_moments(self): """ Test the moments of binomial nodes. """ # Simple test X = Binomial(1, 0.7) u = X._message_to_child() self.assertEqual(len(u), 1) self.assertAllClose(u[0], 0.7) # Test n X = Binomial(10, 0.7) u = X._message_to_child() self.assertAllClose(u[0], 10 * 0.7) # Test plates in p n = np.random.randint(1, 10) p = np.random.rand(3) X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n) # Test plates in n n = np.random.randint(1, 10, size=(3, )) p = np.random.rand() X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n) # Test plates in p and n n = np.random.randint(1, 10, size=(4, 1)) p = np.random.rand(3) X = Binomial(n, p) u = X._message_to_child() self.assertAllClose(u[0], p * n) # Test with beta prior P = Beta([7, 3]) logp = P._message_to_child()[0] p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1])) X = Binomial(1, P) u = X._message_to_child() self.assertAllClose(u[0], p0) # Test with broadcasted plates P = Beta([7, 3], plates=(10, )) X = Binomial(5, P) u = X._message_to_child() self.assertAllClose(u[0] * np.ones(X.get_shape(0)), 5 * p0 * np.ones(10)) pass
def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False): '''Only to be used when doing parameter optimization.''' self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) log_likelihood = Q.L[Q.iter-1] if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) return log_likelihood
def test_init(self): """ Test the creation of binomial nodes. """ # Some simple initializations X = Binomial(10, 0.5) X = Binomial(10, Beta([2, 3])) # Check that plates are correct X = Binomial(10, 0.7, plates=(4, 3)) self.assertEqual(X.plates, (4, 3)) X = Binomial(10, 0.7 * np.ones((4, 3))) self.assertEqual(X.plates, (4, 3)) n = np.ones((4, 3), dtype=np.int) X = Binomial(n, 0.7) self.assertEqual(X.plates, (4, 3)) X = Binomial(10, Beta([4, 3], plates=(4, 3))) self.assertEqual(X.plates, (4, 3)) # Invalid probability self.assertRaises(ValueError, Binomial, 10, -0.5) self.assertRaises(ValueError, Binomial, 10, 1.5) # Invalid number of trials self.assertRaises(ValueError, Binomial, -1, 0.5) self.assertRaises(ValueError, Binomial, 8.5, 0.5) # Inconsistent plates self.assertRaises(ValueError, Binomial, 10, 0.5 * np.ones(4), plates=(3, )) # Explicit plates too small self.assertRaises(ValueError, Binomial, 10, 0.5 * np.ones(4), plates=(1, )) pass
def test_random(self): """ Test random sampling of beta nodes. """ p = Beta([1e20, 3e20]) x = p.random() self.assertAllClose(x, 0.25) p = Beta([[1e20, 3e20], [1e20, 1e20]]) x = p.random() self.assertAllClose(x, [0.25, 0.5]) p = Beta([1e20, 3e20], plates=(3, )) x = p.random() self.assertAllClose(x, [0.25, 0.25, 0.25]) pass
def _setup_bernoulli_mixture(): """ Setup code for the hinton tests. This code is from http://www.bayespy.org/examples/bmm.html """ np.random.seed(1) p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1] p = np.array([p0, p1, p2]) z = random.categorical([1/3, 1/3, 1/3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 10 R = Dirichlet(K*[1e-5], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([0.5, 0.5], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) return (R,P,Z)
def test_random(self): """ Test random sampling of beta nodes. """ p = Beta([1e20, 3e20]) x = p.random() self.assertAllClose(x, 0.25) p = Beta([[1e20, 3e20], [1e20, 1e20]]) x = p.random() self.assertAllClose(x, [0.25, 0.5]) p = Beta([1e20, 3e20], plates=(3,)) x = p.random() self.assertAllClose(x, [0.25, 0.25, 0.25]) pass
import numpy numpy.random.seed(1) p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1] import numpy as np p = np.array([p0, p1, p2]) from bayespy.utils import random z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 10 from bayespy.nodes import Categorical, Dirichlet R = Dirichlet(K * [1e-5], name='R') Z = Categorical(R, plates=(N, 1), name='Z') from bayespy.nodes import Beta P = Beta([0.5, 0.5], plates=(D, K), name='P') from bayespy.nodes import Mixture, Bernoulli X = Mixture(Z, Bernoulli, P) from bayespy.inference import VB Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) import bayespy.plot as bpplt bpplt.hinton(P) bpplt.pyplot.show()
p0 = [0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1] p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9] p2 = [0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] p = np.array([p0, p1, p2]) z = random.categorical([1/3, 1/3, 1/3], size=100) x = random.bernoulli(p[z]) N = 100 D = 10 K = 3 R = Dirichlet(K*[1e-5],name='R') Z = Categorical(R,plates=(N,1),name='Z') P = Beta([0.5, 0.5],plates=(D,K),name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x) Q.update(repeat=1000) #print(" P:") #print( P.get_moments() ) #print(" R:") #print( R.get_moments() ) print(" Z:")
def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False): '''Performs one run of the BBDP according to the specified parameters.''' print("Transforming WCS participant data into binary vectors...") x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh) print("Finished transforming participant data") self.participant_list = x[0] N = len(x[0]) #number of data points (i.e. WCS participants) D = np.shape(x[1])[1] #number of features #K = 20 #number of initial clusters R = Dirichlet(K*[alpha], name='R') Z = Categorical(R, plates=(N,1), name='Z') P = Beta([beta, beta], plates=(D,K), name='P') X = Mixture(Z, Bernoulli, P) Q = VB(Z, R, X, P) P.initialize_from_random() X.observe(x[1]) Q.update(repeat=1000) if hinton_plot: bpplt.hinton(Z) bpplt.pyplot.show() bpplt.hinton(R) bpplt.pyplot.show() #Get the weight matrix stored in Z (weights determine which cluster data point belongs to) z = Z._message_to_child()[0] z = z * np.ones(Z.plates+(1,)) z = np.squeeze(z) self.z = z #Get the weights stored in R (proportional to the size of the clusters) r = np.exp(R._message_to_child()[0]) r = r * np.ones(R.plates+(1,)) r = np.squeeze(r) self.r = r #Get the cluster assignment of each data point self.c_assign = np.argmax(self.z, axis=1) #Write cluster results to a file if self.write_to_file: if end: save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) else: save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh) while path.exists(save_path+".txt"): #save_path already exists try: old_file_num = int(save_path[save_path.find('(')+1:-1]) new_file_num = old_file_num + 1 save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')' except ValueError: save_path = save_path + " (1)" self.save_path = save_path file = open(path.abspath(self.save_path+".txt"), 'w') #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j) if 'Z' not in self.in_file: for i in range(len(self.z)): line = "\t".join([str(x) for x in self.z[i]]) + "\n" file.write(line) file.write('---Z\n') self.in_file.append('Z') #Write cluster weights matrix R (proportional to the size of the resulting clusters) if 'R' not in self.in_file: line = "\t".join([str(x) for x in self.r]) + "\n" file.write(line) file.write('---R\n') self.in_file.append('R') #Write deterministic cluster assignments with the corresponding participant key if 'C' not in self.in_file: line1 = "\t".join([str(x) for x in self.participant_list]) + "\n" line2 = "\t".join([str(x) for x in self.c_assign]) + "\n" file.write(line1) file.write(line2) file.write('---C\n') self.in_file.append('C') file.close() return self.c_assign