Exemplo n.º 1
0
    def test_init(self):
        """
        Test the creation of Bernoulli nodes.
        """

        # Some simple initializations
        X = Bernoulli(0.5)
        X = Bernoulli(Beta([2, 3]))

        # Check that plates are correct
        X = Bernoulli(0.7, plates=(4, 3))
        self.assertEqual(X.plates, (4, 3))
        X = Bernoulli(0.7 * np.ones((4, 3)))
        self.assertEqual(X.plates, (4, 3))
        X = Bernoulli(Beta([4, 3], plates=(4, 3)))
        self.assertEqual(X.plates, (4, 3))

        # Invalid probability
        self.assertRaises(ValueError, Bernoulli, -0.5)
        self.assertRaises(ValueError, Bernoulli, 1.5)

        # Inconsistent plates
        self.assertRaises(ValueError,
                          Bernoulli,
                          0.5 * np.ones(4),
                          plates=(3, ))

        # Explicit plates too small
        self.assertRaises(ValueError,
                          Bernoulli,
                          0.5 * np.ones(4),
                          plates=(1, ))

        pass
Exemplo n.º 2
0
def _setup_bernoulli_mixture():
    """
    Setup code for the hinton tests.

    This code is from http://www.bayespy.org/examples/bmm.html
    """
    np.random.seed(1)
    p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
    p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
    p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
    p = np.array([p0, p1, p2])

    z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100)
    x = random.bernoulli(p[z])
    N = 100
    D = 10
    K = 10

    R = Dirichlet(K * [1e-5], name='R')
    Z = Categorical(R, plates=(N, 1), name='Z')

    P = Beta([0.5, 0.5], plates=(D, K), name='P')

    X = Mixture(Z, Bernoulli, P)

    Q = VB(Z, R, X, P)
    P.initialize_from_random()
    X.observe(x)
    Q.update(repeat=1000)

    return (R, P, Z)
Exemplo n.º 3
0
    def test_init(self):
        """
        Test the creation of beta nodes.
        """

        # Some simple initializations
        p = Beta([1.5, 4.2])

        # Check that plates are correct
        p = Beta([2, 3], plates=(4, 3))
        self.assertEqual(p.plates, (4, 3))
        p = Beta(np.ones((4, 3, 2)))
        self.assertEqual(p.plates, (4, 3))

        # Parent not a vector
        self.assertRaises(ValueError, Beta, 4)

        # Parent vector has wrong shape
        self.assertRaises(ValueError, Beta, [4])
        self.assertRaises(ValueError, Beta, [4, 4, 4])

        # Parent vector has invalid values
        self.assertRaises(ValueError, Beta, [-2, 3])

        # Plates inconsistent
        self.assertRaises(ValueError, Beta, np.ones((4, 2)), plates=(3, ))

        # Explicit plates too small
        self.assertRaises(ValueError, Beta, np.ones((4, 2)), plates=(1, ))

        pass
Exemplo n.º 4
0
    def test_moments(self):
        """
        Test the moments of Bernoulli nodes.
        """

        # Simple test
        X = Bernoulli(0.7)
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0], 0.7)

        # Test plates in p
        p = np.random.rand(3)
        X = Bernoulli(p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p)

        # Test with beta prior
        P = Beta([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Bernoulli(P)
        u = X._message_to_child()
        self.assertAllClose(u[0], p0)

        # Test with broadcasted plates
        P = Beta([7, 3], plates=(10, ))
        X = Bernoulli(P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p0 * np.ones(10))

        pass
Exemplo n.º 5
0
    def test_moments(self):
        """
        Test the moments of Bernoulli nodes.
        """

        # Simple test
        X = Bernoulli(0.7)
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0], 0.7)

        # Test plates in p
        p = np.random.rand(3)
        X = Bernoulli(p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p)

        # Test with beta prior
        P = Beta([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Bernoulli(P)
        u = X._message_to_child()
        self.assertAllClose(u[0], p0)

        # Test with broadcasted plates
        P = Beta([7, 3], plates=(10,))
        X = Bernoulli(P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)), p0 * np.ones(10))

        pass
Exemplo n.º 6
0
    def test_moments(self):
        """
        Test the moments of binomial nodes.
        """

        # Simple test
        X = Binomial(1, 0.7)
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0],
                            0.7)

        # Test n
        X = Binomial(10, 0.7)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            10*0.7)

        # Test plates in p
        n = np.random.randint(1, 10)
        p = np.random.rand(3)
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n)
        
        # Test plates in n
        n = np.random.randint(1, 10, size=(3,))
        p = np.random.rand()
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n)

        # Test plates in p and n
        n = np.random.randint(1, 10, size=(4,1))
        p = np.random.rand(3)
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p*n)

        # Test with beta prior
        P = Beta([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Binomial(1, P)
        u = X._message_to_child()
        self.assertAllClose(u[0],
                            p0)

        # Test with broadcasted plates
        P = Beta([7, 3], plates=(10,))
        X = Binomial(5, P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)),
                            5*p0*np.ones(10))

        pass
Exemplo n.º 7
0
    def test_moments(self):
        """
        Test the moments of beta nodes.
        """

        p = Beta([2, 3])
        u = p._message_to_child()
        self.assertAllClose(u[0], special.psi([2, 3]) - special.psi(2 + 3))

        pass
Exemplo n.º 8
0
    def test_moments(self):
        """
        Test the moments of beta nodes.
        """

        p = Beta([2, 3])
        u = p._message_to_child()
        self.assertAllClose(u[0], special.psi([2, 3]) - special.psi(2 + 3))

        pass
Exemplo n.º 9
0
    def test_moments(self):
        """
        Test the moments of binomial nodes.
        """

        # Simple test
        X = Binomial(1, 0.7)
        u = X._message_to_child()
        self.assertEqual(len(u), 1)
        self.assertAllClose(u[0], 0.7)

        # Test n
        X = Binomial(10, 0.7)
        u = X._message_to_child()
        self.assertAllClose(u[0], 10 * 0.7)

        # Test plates in p
        n = np.random.randint(1, 10)
        p = np.random.rand(3)
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n)

        # Test plates in n
        n = np.random.randint(1, 10, size=(3, ))
        p = np.random.rand()
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n)

        # Test plates in p and n
        n = np.random.randint(1, 10, size=(4, 1))
        p = np.random.rand(3)
        X = Binomial(n, p)
        u = X._message_to_child()
        self.assertAllClose(u[0], p * n)

        # Test with beta prior
        P = Beta([7, 3])
        logp = P._message_to_child()[0]
        p0 = np.exp(logp[0]) / (np.exp(logp[0]) + np.exp(logp[1]))
        X = Binomial(1, P)
        u = X._message_to_child()
        self.assertAllClose(u[0], p0)

        # Test with broadcasted plates
        P = Beta([7, 3], plates=(10, ))
        X = Binomial(5, P)
        u = X._message_to_child()
        self.assertAllClose(u[0] * np.ones(X.get_shape(0)),
                            5 * p0 * np.ones(10))

        pass
Exemplo n.º 10
0
    def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False):
        '''Only to be used when doing parameter optimization.'''

        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        log_likelihood = Q.L[Q.iter-1]

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        return log_likelihood
Exemplo n.º 11
0
    def test_init(self):
        """
        Test the creation of binomial nodes.
        """

        # Some simple initializations
        X = Binomial(10, 0.5)
        X = Binomial(10, Beta([2, 3]))

        # Check that plates are correct
        X = Binomial(10, 0.7, plates=(4, 3))
        self.assertEqual(X.plates, (4, 3))
        X = Binomial(10, 0.7 * np.ones((4, 3)))
        self.assertEqual(X.plates, (4, 3))
        n = np.ones((4, 3), dtype=np.int)
        X = Binomial(n, 0.7)
        self.assertEqual(X.plates, (4, 3))
        X = Binomial(10, Beta([4, 3], plates=(4, 3)))
        self.assertEqual(X.plates, (4, 3))

        # Invalid probability
        self.assertRaises(ValueError, Binomial, 10, -0.5)
        self.assertRaises(ValueError, Binomial, 10, 1.5)

        # Invalid number of trials
        self.assertRaises(ValueError, Binomial, -1, 0.5)
        self.assertRaises(ValueError, Binomial, 8.5, 0.5)

        # Inconsistent plates
        self.assertRaises(ValueError,
                          Binomial,
                          10,
                          0.5 * np.ones(4),
                          plates=(3, ))

        # Explicit plates too small
        self.assertRaises(ValueError,
                          Binomial,
                          10,
                          0.5 * np.ones(4),
                          plates=(1, ))

        pass
Exemplo n.º 12
0
    def test_random(self):
        """
        Test random sampling of beta nodes.
        """

        p = Beta([1e20, 3e20])
        x = p.random()
        self.assertAllClose(x, 0.25)

        p = Beta([[1e20, 3e20], [1e20, 1e20]])
        x = p.random()
        self.assertAllClose(x, [0.25, 0.5])

        p = Beta([1e20, 3e20], plates=(3, ))
        x = p.random()
        self.assertAllClose(x, [0.25, 0.25, 0.25])

        pass
Exemplo n.º 13
0
def _setup_bernoulli_mixture():
    """
    Setup code for the hinton tests.

    This code is from http://www.bayespy.org/examples/bmm.html
    """
    np.random.seed(1)
    p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
    p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
    p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
    p = np.array([p0, p1, p2])

    z = random.categorical([1/3, 1/3, 1/3], size=100)
    x = random.bernoulli(p[z])
    N = 100
    D = 10
    K = 10

    R = Dirichlet(K*[1e-5],
                  name='R')
    Z = Categorical(R,
                    plates=(N,1),
                    name='Z')

    P = Beta([0.5, 0.5],
             plates=(D,K),
             name='P')

    X = Mixture(Z, Bernoulli, P)

    Q = VB(Z, R, X, P)
    P.initialize_from_random()
    X.observe(x)
    Q.update(repeat=1000)

    return (R,P,Z)
Exemplo n.º 14
0
    def test_random(self):
        """
        Test random sampling of beta nodes.
        """

        p = Beta([1e20, 3e20])
        x = p.random()
        self.assertAllClose(x, 0.25)

        p = Beta([[1e20, 3e20], [1e20, 1e20]])
        x = p.random()
        self.assertAllClose(x, [0.25, 0.5])

        p = Beta([1e20, 3e20], plates=(3,))
        x = p.random()
        self.assertAllClose(x, [0.25, 0.25, 0.25])

        pass
Exemplo n.º 15
0
import numpy
numpy.random.seed(1)
p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
import numpy as np
p = np.array([p0, p1, p2])
from bayespy.utils import random
z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100)
x = random.bernoulli(p[z])
N = 100
D = 10
K = 10
from bayespy.nodes import Categorical, Dirichlet
R = Dirichlet(K * [1e-5], name='R')
Z = Categorical(R, plates=(N, 1), name='Z')
from bayespy.nodes import Beta
P = Beta([0.5, 0.5], plates=(D, K), name='P')
from bayespy.nodes import Mixture, Bernoulli
X = Mixture(Z, Bernoulli, P)
from bayespy.inference import VB
Q = VB(Z, R, X, P)
P.initialize_from_random()
X.observe(x)
Q.update(repeat=1000)
import bayespy.plot as bpplt
bpplt.hinton(P)
bpplt.pyplot.show()
Exemplo n.º 16
0
p0 = [0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1]
p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9]
p2 = [0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]

p = np.array([p0, p1, p2])
z = random.categorical([1/3, 1/3, 1/3], size=100)
x = random.bernoulli(p[z])

N = 100
D = 10
K = 3

R = Dirichlet(K*[1e-5],name='R')
Z = Categorical(R,plates=(N,1),name='Z')
P = Beta([0.5, 0.5],plates=(D,K),name='P')
X = Mixture(Z, Bernoulli, P)

Q = VB(Z, R, X, P)
P.initialize_from_random()
X.observe(x)

Q.update(repeat=1000)

#print(" P:")
#print( P.get_moments() )

#print(" R:")
#print( R.get_moments() )

print(" Z:")
Exemplo n.º 17
0
    def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False):
        '''Performs one run of the BBDP according to the specified parameters.'''

        print("Transforming WCS participant data into binary vectors...")
        x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh)
        print("Finished transforming participant data") 
        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        #Write cluster results to a file
        if self.write_to_file:
            if end:
                save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            else:
                save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            while path.exists(save_path+".txt"):
                #save_path already exists
                try:
                    old_file_num = int(save_path[save_path.find('(')+1:-1])
                    new_file_num = old_file_num + 1
                    save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')'
                except ValueError:
                    save_path = save_path + " (1)"

            self.save_path = save_path       
            file = open(path.abspath(self.save_path+".txt"), 'w')
            
            #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j)
            if 'Z' not in self.in_file:
                for i in range(len(self.z)):
                    line = "\t".join([str(x) for x in self.z[i]]) + "\n"
                    file.write(line)
                file.write('---Z\n')
                self.in_file.append('Z')

            #Write cluster weights matrix R (proportional to the size of the resulting clusters)
            if 'R' not in self.in_file:
                line = "\t".join([str(x) for x in self.r]) + "\n"
                file.write(line)
                file.write('---R\n')
                self.in_file.append('R')

            #Write deterministic cluster assignments with the corresponding participant key
            if 'C' not in self.in_file:
                line1 = "\t".join([str(x) for x in self.participant_list]) + "\n"
                line2 = "\t".join([str(x) for x in self.c_assign]) + "\n"              
                file.write(line1)
                file.write(line2)
                file.write('---C\n')
                self.in_file.append('C')
            
            file.close()

        return self.c_assign