Exemplo n.º 1
0
def _setup_linear_regression():
    """
    Setup code for the pdf and contour tests.

    This code is from http://www.bayespy.org/examples/regression.html
    """
    np.random.seed(1)
    k = 2  # slope
    c = 5  # bias
    s = 2  # noise standard deviation

    x = np.arange(10)
    y = k * x + c + s * np.random.randn(10)
    X = np.vstack([x, np.ones(len(x))]).T

    B = GaussianARD(0, 1e-6, shape=(2, ))

    F = SumMultiply('i,i', B, X)

    tau = Gamma(1e-3, 1e-3)
    Y = GaussianARD(F, tau)
    Y.observe(y)

    Q = VB(Y, B, tau)
    Q.update(repeat=1000)
    xh = np.linspace(-5, 15, 100)
    Xh = np.vstack([xh, np.ones(len(xh))]).T
    Fh = SumMultiply('i,i', B, Xh)

    return locals()
Exemplo n.º 2
0
def test_gaussian_mixture_plot():
    """
    Test the gaussian_mixture plotting function.

    The code is from http://www.bayespy.org/examples/gmm.html
    """
    np.random.seed(1)
    y0 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 0.02]], size=50)
    y1 = np.random.multivariate_normal([0, 0], [[0.02, 0], [0, 1]], size=50)
    y2 = np.random.multivariate_normal([2, 2], [[1, -0.9], [-0.9, 1]], size=50)
    y3 = np.random.multivariate_normal([-2, -2], [[0.1, 0], [0, 0.1]], size=50)
    y = np.vstack([y0, y1, y2, y3])

    bpplt.pyplot.plot(y[:, 0], y[:, 1], 'rx')

    N = 200
    D = 2
    K = 10

    alpha = Dirichlet(1e-5 * np.ones(K), name='alpha')
    Z = Categorical(alpha, plates=(N, ), name='z')

    mu = Gaussian(np.zeros(D), 1e-5 * np.identity(D), plates=(K, ), name='mu')
    Lambda = Wishart(D, 1e-5 * np.identity(D), plates=(K, ), name='Lambda')

    Y = Mixture(Z, Gaussian, mu, Lambda, name='Y')
    Z.initialize_from_random()

    Q = VB(Y, mu, Lambda, Z, alpha)
    Y.observe(y)
    Q.update(repeat=1000)

    bpplt.gaussian_mixture_2d(Y, scale=2)
Exemplo n.º 3
0
def _setup_linear_regression():
    """
    Setup code for the pdf and contour tests.

    This code is from http://www.bayespy.org/examples/regression.html
    """
    np.random.seed(1)
    k = 2 # slope
    c = 5 # bias
    s = 2 # noise standard deviation

    x = np.arange(10)
    y = k*x + c + s*np.random.randn(10)
    X = np.vstack([x, np.ones(len(x))]).T

    B = GaussianARD(0, 1e-6, shape=(2,))

    F = SumMultiply('i,i', B, X)

    tau = Gamma(1e-3, 1e-3)
    Y = GaussianARD(F, tau)
    Y.observe(y)

    Q = VB(Y, B, tau)
    Q.update(repeat=1000)
    xh = np.linspace(-5, 15, 100)
    Xh = np.vstack([xh, np.ones(len(xh))]).T
    Fh = SumMultiply('i,i', B, Xh)

    return locals()
Exemplo n.º 4
0
def _setup_bernoulli_mixture():
    """
    Setup code for the hinton tests.

    This code is from http://www.bayespy.org/examples/bmm.html
    """
    np.random.seed(1)
    p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
    p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
    p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
    p = np.array([p0, p1, p2])

    z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100)
    x = random.bernoulli(p[z])
    N = 100
    D = 10
    K = 10

    R = Dirichlet(K * [1e-5], name='R')
    Z = Categorical(R, plates=(N, 1), name='Z')

    P = Beta([0.5, 0.5], plates=(D, K), name='P')

    X = Mixture(Z, Bernoulli, P)

    Q = VB(Z, R, X, P)
    P.initialize_from_random()
    X.observe(x)
    Q.update(repeat=1000)

    return (R, P, Z)
Exemplo n.º 5
0
def run():
    k = 2
    c = 5
    s = 2
    x = np.arange(10)
    y = k * x + c + s * np.random.randn(10)

    X=np.vstack([x,np.ones(len(x))]).T
    B = GaussianARD(0, 1e-6, shape=(2,))
    F = SumMultiply('i,i', B, X)
    tau = Gamma(1e-3, 1e-3)

    Y = GaussianARD(F, tau)

    Y.observe(y)
    from bayespy.inference import VB
    Q = VB(Y, B, tau)
    Q.update(repeat=1000)
    xh = np.linspace(-5, 15, 100)
    Xh = np.vstack([xh, np.ones(len(xh))]).T
    Fh = SumMultiply('i,i', B, Xh)


    bpplt.pyplot.figure()
    bpplt.plot(Fh, x=xh, scale=2)
    bpplt.plot(y, x=x, color='r', marker='x', linestyle='None')
    bpplt.plot(k*xh+c, x=xh, color='r');
    bpplt.pyplot.show()
Exemplo n.º 6
0
    def fit(self, X, y):
        self.weights = GaussianARD(0, 1e-6, shape=(X.shape[-1], ))
        y_mean = SumMultiply('i,i', self.weights, X)
        precision = Gamma(1, .1)
        y_obs = GaussianARD(y_mean, precision)
        y_obs.observe(y)

        Q = VB(y_obs, self.weights, precision)
        Q.update(repeat=self.n_iter, tol=self.tolerance, verbose=False)
Exemplo n.º 7
0
    def _run(self, x, K=25, beta=0.5, alpha=0.00001, hinton_plot=False, end=False):
        '''Only to be used when doing parameter optimization.'''

        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        log_likelihood = Q.L[Q.iter-1]

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        return log_likelihood
Exemplo n.º 8
0
def run():

    a = nodes.GammaShape(name='a')
    b = nodes.Gamma(1e-5, 1e-5, name='b')

    tau = nodes.Gamma(a, b, plates=(1000, ), name='tau')
    tau.observe(nodes.Gamma(10, 20, plates=(1000, )).random())

    Q = VB(tau, a, b)

    Q.update(repeat=1000)

    print("True gamma parameters:", 10.0, 20.0)
    print("Estimated parameters from 1000 samples:", a.u[0], b.u[0])
Exemplo n.º 9
0
    def fit(self, X, y):
        self._init_weights()
        # self.cost,
        # self.myopic_voc(action, state),
        # self.vpi_action(action, state),
        # self.vpi(state),
        # self.expected_term_reward(state)

        self.tau = Gamma(self.prior_a, self.prior_b)
        F = SumMultiply('i,i', self.weights, X)
        y_obs = GaussianARD(F, self.tau)
        y_obs.observe(y)

        Q = VB(y_obs, self.weights)
        Q.update(repeat=10, tol=1e-4, verbose=False)
Exemplo n.º 10
0
def test_gaussian_mixture_plot():
    """
    Test the gaussian_mixture plotting function.

    The code is from http://www.bayespy.org/examples/gmm.html
    """
    np.random.seed(1)
    y0 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 0.02]], size=50)
    y1 = np.random.multivariate_normal([0, 0], [[0.02, 0], [0, 1]], size=50)
    y2 = np.random.multivariate_normal([2, 2], [[1, -0.9], [-0.9, 1]], size=50)
    y3 = np.random.multivariate_normal([-2, -2], [[0.1, 0], [0, 0.1]], size=50)
    y = np.vstack([y0, y1, y2, y3])

    bpplt.pyplot.plot(y[:,0], y[:,1], 'rx')

    N = 200
    D = 2
    K = 10

    alpha = Dirichlet(1e-5*np.ones(K),
                      name='alpha')
    Z = Categorical(alpha,
                    plates=(N,),
                    name='z')

    mu = Gaussian(np.zeros(D), 1e-5*np.identity(D),
                  plates=(K,),
                  name='mu')
    Lambda = Wishart(D, 1e-5*np.identity(D),
                     plates=(K,),
                     name='Lambda')

    Y = Mixture(Z, Gaussian, mu, Lambda,
                name='Y')
    Z.initialize_from_random()

    Q = VB(Y, mu, Lambda, Z, alpha)
    Y.observe(y)
    Q.update(repeat=1000)

    bpplt.gaussian_mixture_2d(Y, scale=2)

    # Have to define these limits because on some particular environments these
    # may otherwise differ and thus result in an image comparsion failure
    bpplt.pyplot.xlim([-3, 6])
    bpplt.pyplot.ylim([-3, 5])
Exemplo n.º 11
0
def _setup_bernoulli_mixture():
    """
    Setup code for the hinton tests.

    This code is from http://www.bayespy.org/examples/bmm.html
    """
    np.random.seed(1)
    p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
    p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
    p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
    p = np.array([p0, p1, p2])

    z = random.categorical([1/3, 1/3, 1/3], size=100)
    x = random.bernoulli(p[z])
    N = 100
    D = 10
    K = 10

    R = Dirichlet(K*[1e-5],
                  name='R')
    Z = Categorical(R,
                    plates=(N,1),
                    name='Z')

    P = Beta([0.5, 0.5],
             plates=(D,K),
             name='P')

    X = Mixture(Z, Bernoulli, P)

    Q = VB(Z, R, X, P)
    P.initialize_from_random()
    X.observe(x)
    Q.update(repeat=1000)

    return (R,P,Z)
Exemplo n.º 12
0
print("++++++++++++++++++++++++++")

N = 10000
y = np.random.choice(3, size=N, p=[0.3, 0.6, 0.1])


a0 = [0.5, 0.1, 0.1]

mu0 = -1
lambda0 = 5



#MU = bayes.Gaussian(mu=mu0, Lambda=0.9)
#X = bayes.Gaussian(mu=0.2, Lambda=0.4, plates=(N, ))
P = bayes.Dirichlet(a0)
X = bayes.Categorical(P, plates=(N, ))

#P.initialize_from_random()

Q = VB(X, P)

X.observe(y)
Q.update(repeat=1000)


print(X.pdf([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
print(P.random())
#print(np.sum(y==2))
Exemplo n.º 13
0
mu = GaussianARD(0, 1e-6)
tau = Gamma(1e-6, 1e-6)
y = GaussianARD(mu, tau, plates=(10,))


# In[3]:

y.observe(data)


# Next we want to estimate the posterior distribution.  In principle, we could use different inference engines (e.g., MCMC or EP) but currently only variational Bayesian (VB) engine is implemented.  The engine is initialized by giving all the nodes of the model:

# In[4]:

from bayespy.inference import VB
Q = VB(mu, tau, y)


# The inference algorithm can be run as long as wanted (max. 20 iterations in this case):

# In[5]:

Q.update(repeat=20)


# Now the algorithm converged after four iterations, before the requested 20 iterations. 
# 
# VB approximates the true posterior $p(\mu,\tau|\mathbf{y})$ with a distribution which factorizes with respect to the nodes: $q(\mu)q(\tau)$. The resulting approximate posterior distributions $q(\mu)$ and $q(\tau)$ can be examined, for instance, by plotting the marginal probability density functions:

# In[6]:
Exemplo n.º 14
0
plt.show()

# Define nodes of the factor graph:
#------------------------------------
alpha = Dirichlet(1e-5 * np.ones(K), name='alpha')
Z = Categorical(alpha, plates=(N, ), name='z')

mu = Gaussian(np.zeros(D), 1e-5 * np.identity(D), plates=(K, ), name='mu')

Lambda = Wishart(D, 1e-5 * np.identity(D), plates=(K, ), name='Lambda')

Y = Mixture(Z, Gaussian, mu, Lambda, name='Y')

Z.initialize_from_random()

Q = VB(Y, mu, Lambda, Z, alpha)

Y.observe(y)

Q.update(repeat=1000)

bpplt.gaussian_mixture_2d(Y, alpha=alpha, scale=2)

Q.compute_lowerbound()
Y.random()

from sklearn.mixture import BayesianGaussianMixture

# DD
fin_gmm = BayesianGaussianMixture(
    weight_concentration_prior_type="dirichlet_distribution",
Exemplo n.º 15
0
    def predict(self):
        #print(self.network.graph)
        self.predictions = {
            term: numpy.empty((self.lenValidation, 2), dtype=float)
            for term in self.ontology.ontology
        }

        classifiers = {
            term: lambda:
            loadClf(self.ontology[term]['name'], self.fold, self.clfName
                    )  #self.ontology[term]['clf'][self.fold][self.clfName]
            for term in self.ontology.ontology
        }
        #for term, (clf,X,y,g) in classifiers.items():
        #    print(term, ":", repr(self.clfName), repr(clf.name), self.fold, clf.fold)

        observations = {
            term: clf.decision_function(clf.X_validation) if
            term != self.ontology.root else numpy.array([-1.] *
                                                        len(clf.X_validation))
            for term, clff  #(clf, X, y, g)
            in classifiers.items() for clf in (clff(), )
        }
        #print("observations:")
        #print(observations)
        gt = {term: clf().y_validation for term, clf in classifiers.items()}
        #print("gt:")
        #print(gt)

        for i in range(self.lenValidation):
            observation = {
                term: pred[i]
                for term, pred in observations.items()
            }
            #print("Observation for gene %d" % i)
            #print(observation)
            #print(self.network.forward_backward(observation))
            hidden, observed, extra = self.getCopy()
            #print(hidden)
            #print(observed)
            #print(observation)
            #for term, node in hidden.items():
            #    print(i, term, node.get_moments()[0])
            for k, v in observation.items():
                observed[k].observe((v, ))
            #    print("%s observes %s" % (k, v))
            allv = (*hidden.values(), *observed.values(), *extra)
            #print([(x, [p for p in x.parents if isinstance(p, Stochastic)]) for x in [*hidden.values(), *observed.values()]])
            Q = VB(*allv)
            Q.update(*allv, tol=1e-7, repeat=1000, verbose=True)
            #print("---")

            for term, node in hidden.items():
                #print(i, term, node.get_moments()[0])
                self.predictions[term][i, :] = node.get_moments()[0]

        #print("predictions:")
        #print(self.predictions)
        for term in observations:
            compare = numpy.empty((len(gt[term]), 4), dtype=float)
            compare[:, 0] = gt[term]
            compare[:, 1] = observations[term]
            compare[:, 2] = self.predictions[term][:, 1]
            compare[:, 3] = numpy.round(self.predictions[term][:, 1])
            print(term, self.ontology[term]['name'])
            print(compare)
Exemplo n.º 16
0
    def create_model(self, model_type=None):

        #Create location model for each of the timezone
        location_model = []

        if ('all' == model_type):
            p_conc = nodes.DirichletConcentration(self.N_LOCATIONS)
            p_conc.initialize_from_value(np.ones(self.N_LOCATIONS))
            p_theta = nodes.Dirichlet(p_conc,
                                      plates = (self.N_TIMEZONES,),
                                      name = 'p_theta')
            for time in np.arange(self.N_TIMEZONES):
                model = nodes.Categorical(p_theta[time],
                                        plates=(self.N_OBSERVATIONS[time],1),
                                        name=str(time))

                #observe data
                timezone_observations = self._observed_locations[self._observed_locations['time'] == time]

                if not timezone_observations.empty:
                    data = timezone_observations['location'].as_matrix().reshape((self.N_OBSERVATIONS[time],1))
                    model.observe(data)

                location_model.append(model)


            Q = VB(location_model[0], location_model[1], location_model[2], location_model[3],
                    location_model[4], location_model[5], location_model[6], location_model[7],
                    location_model[8], location_model[9], location_model[10], location_model[11],
                    location_model[12], location_model[13], location_model[14], location_model[15],
                    location_model[16], location_model[17], location_model[18], location_model[19],
                    location_model[20], location_model[21], location_model[22], location_model[23],
                    p_theta, p_conc)

        elif ('cross' == model_type):
            raise 'Not Implemented'
            pass
        elif ('2fold' == model_type):
            p_conc_morning = nodes.DirichletConcentration(self.N_LOCATIONS)
            p_conc_night = nodes.DirichletConcentration(self.N_LOCATIONS)

            p_conc_morning.initialize_from_value(np.ones(self.N_LOCATIONS))
            p_conc_night.initialize_from_value(np.ones(self.N_LOCATIONS))

            morning_time = np.arange(6,19)
            night_time = np.append(np.arange(0,6) , np.arange(19,24))

            p_theta_morning = nodes.Dirichlet(p_conc_morning,
                                      plates = (morning_time.size,),
                                      name = 'p_theta_morning')
            p_theta_night = nodes.Dirichlet(p_conc_night,
                                      plates = (night_time.size,),
                                      name = 'p_theta_night')


            #Combinging morning time
            for count, time in enumerate(morning_time):
                model = nodes.Categorical(p_theta_morning[count],
                                        plates=(self.N_OBSERVATIONS[time],1),
                                        name=str(time))

                #observe data
                timezone_observations = self._observed_locations[self._observed_locations['time'] == time]
                #print(timezone_observations)

                if not timezone_observations.empty:
                    data = timezone_observations['location'].as_matrix().reshape((self.N_OBSERVATIONS[time],1))
                    model.observe(data)

                location_model.append(model)

            #Combinging night time
            for count, time in enumerate(night_time):
                model = nodes.Categorical(p_theta_night[count],
                                        plates=(self.N_OBSERVATIONS[time],1),
                                        name=str(time))

                #observe data
                timezone_observations = self._observed_locations[self._observed_locations['time'] == time]

                if not timezone_observations.empty:
                    data = timezone_observations['location'].as_matrix().reshape((self.N_OBSERVATIONS[time],1))
                    model.observe(data)

                location_model.append(model)

            Q = VB(location_model[0], location_model[1], location_model[2], location_model[3],
                    location_model[4], location_model[5], location_model[6], location_model[7],
                    location_model[8], location_model[9], location_model[10], location_model[11],
                    location_model[12], location_model[13], location_model[14], location_model[15],
                    location_model[16], location_model[17], location_model[18], location_model[19],
                    location_model[20], location_model[21], location_model[22], location_model[23],
                    p_theta_morning, p_theta_night, p_conc_morning, p_conc_night)
        else:
            raise 'no model_type selected'

        print ("models created")

        ####################################################################################
        #Learning parameters
        Q.update(repeat=1000)
        print ('learned params')
        ####################################################################################
        
        if ('all' == model_type):
            return np.array(p_theta.get_parameters()).reshape((self.N_TIMEZONES,self.N_LOCATIONS))
        elif ('2fold' == model_type):
            learned_night = np.array(p_theta_night.get_parameters()).reshape((night_time.size, self.N_LOCATIONS))
            learned_morn = np.array(p_theta_morning.get_parameters()).reshape((morning_time.size, self.N_LOCATIONS))
            return(np.row_stack((learned_night[:6,:], learned_morn, learned_night[6:,:])))
Exemplo n.º 17
0
    def predict(self):
        #print(self.network.graph)
        self.predictions = {term : numpy.empty((self.lenValidation, 2), dtype=float) for term in self.ontology.ontology}

        classifiers = {
                term : lambda: loadClf(self.ontology[term]['name'], self.fold, self.clfName) #self.ontology[term]['clf'][self.fold][self.clfName]
                for term
                in self.ontology.ontology
                }
        #for term, (clf,X,y,g) in classifiers.items():
        #    print(term, ":", repr(self.clfName), repr(clf.name), self.fold, clf.fold)

        observations = {
                term : clf.decision_function(clf.X_validation) if term != self.ontology.root else numpy.array([-1.]*len(clf.X_validation))
                for term, clff #(clf, X, y, g)
                in classifiers.items()
                for clf in (clff(),)
                }
        #print("observations:")
        #print(observations)
        gt = {
                term : clf().y_validation
                for term, clf
                in classifiers.items()}
        #print("gt:")
        #print(gt)

        for i in range(self.lenValidation):
            observation = {term : pred[i] for term, pred in observations.items()}
            #print("Observation for gene %d" % i)
            #print(observation)
            #print(self.network.forward_backward(observation))
            hidden, observed, extra = self.getCopy()
            #print(hidden)
            #print(observed)
            #print(observation)
            #for term, node in hidden.items():
            #    print(i, term, node.get_moments()[0])
            for k,v in observation.items():
                observed[k].observe((v,))
            #    print("%s observes %s" % (k, v))
            allv = (*hidden.values(), *observed.values(), *extra)
            #print([(x, [p for p in x.parents if isinstance(p, Stochastic)]) for x in [*hidden.values(), *observed.values()]])
            Q = VB(*allv)
            Q.update(*allv, tol=1e-7, repeat=1000, verbose=True)
            #print("---")

            for term, node in hidden.items():
                #print(i, term, node.get_moments()[0])
                self.predictions[term][i,:] = node.get_moments()[0]

        #print("predictions:")
        #print(self.predictions)
        for term in observations:
                compare = numpy.empty((len(gt[term]),4), dtype=float)
                compare[:,0] = gt[term]
                compare[:,1] = observations[term]
                compare[:,2] = self.predictions[term][:,1]
                compare[:,3] = numpy.round(self.predictions[term][:,1])
                print(term, self.ontology[term]['name'])
                print(compare)
Exemplo n.º 18
0

A = Categorical([0.5, 0.5])

T = Mixture(A, Categorical, [[0.99, 0.01], [0.8, 0.2]])

S = Categorical([0.5, 0.5])

L = Mixture(S, Categorical, [[0.98, 0.02], [0.75, 0.25]])

B = Mixture(S, Categorical, [[0.97, 0.03], [0.70, 0.30]])

X = Mixture(T, Mixture, L, Categorical, _or([0.96, 0.04], [0.115, 0.885]))

D = Mixture(B, Mixture, X, Categorical, _or([0.115, 0.885], [0.04, 0.96]))

T.observe(TRUE)
S.observe(FALSE)

B.observe(TRUE)

Q = VB(A, T, S, L, B, X, D)
Q.update(repeat=100)

print("P(asia): ", A.get_moments()[0][TRUE])
print("P(tuberculosis): ", T.get_moments()[0][TRUE])
print("P(smoking): ", S.get_moments()[0][TRUE])
print("P(lung): ", L.get_moments()[0][TRUE])
print("P(bronchitis): ", B.get_moments()[0][TRUE])
print("P(xray): ", X.get_moments()[0][TRUE])
print("P(dyspnea): ", D.get_moments()[0][TRUE])
Exemplo n.º 19
0
r = (1 - q) / (K - 1)
P = q * np.identity(K) + r * (np.ones((3, 3)) - np.identity(3))
y = np.zeros((N, 2))
z = np.zeros(N)
state = np.random.choice(K, p=p0)
for n in range(N):
    z[n] = state
    y[n, :] = std * np.random.randn(2) + mu[state]
    state = np.random.choice(K, p=P[state])
from bayespy.nodes import Dirichlet

a0 = Dirichlet(1e-3 * np.ones(K))
A = Dirichlet(1e-3 * np.ones((K, K)))
Z = CategoricalMarkovChain(a0, A, states=N)
Lambda = std**(-2) * np.identity(2)
from bayespy.nodes import Gaussian

Y = Mixture(Z, Gaussian, mu, Lambda)
Y.observe(y)
Q = VB(Y, Z, A, a0)
Q.update(repeat=1000)
bpplt.pyplot.figure()
bpplt.pyplot.axis('equal')
colors = Y.parents[0].get_moments()[0]

bpplt.pyplot.plot(y[:, 0], y[:, 1], 'k-', zorder=-10)
bpplt.pyplot.scatter(y[:, 0], y[:, 1], c=colors, s=40)
bpplt.pyplot.show()
print(Y.parents[0].get_moments())
print(Z.random())
print(Y.parents[0].get_moments()[0])
Exemplo n.º 20
0
from bayespy.nodes import Dirichlet, Categorical
from bayespy.nodes import Gaussian, Wishart
from bayespy.nodes import Mixture
from bayespy.inference import VB

y0 = np.random.multivariate_normal([0, 0], [[2, 0], [0, 0.1]], size=50)
y1 = np.random.multivariate_normal([0, 0], [[0.1, 0], [0, 2]], size=50)
y2 = np.random.multivariate_normal([2, 2], [[2, -1.5], [-1.5, 2]], size=50)
y3 = np.random.multivariate_normal([-2, -2], [[0.5, 0], [0, 0.5]], size=50)
y = np.vstack([y0, y1, y2, y3])

N = 200
D = 2
K = 10

alpha = Dirichlet(1e-5*np.ones(K), name='alpha')
Z = Categorical(alpha, plates=(N,),name='z')

mu = Gaussian(np.zeros(D),1e-5*np.identity(D),plates=(K,),name='mu')
Lambda = Wishart(D,1e-5*np.identity(D),plates=(K,),name='Lambda')

Y = Mixture(Z, Gaussian, mu, Lambda, name='Y')
Z.initialize_from_random()
Q = VB(Y, mu, Lambda, Z, alpha)

Y.observe(y)
Q.update(repeat=1000)

bpplt.gaussian_mixture_2d(Y, alpha=alpha, scale=2)

Exemplo n.º 21
0
    def get_community_assignments_by(self,
                                     method=None,
                                     temp_dfile_file="gibbsldapp.dfile",
                                     params={}):

        if method == "HMM":
            """
            model = hmm.MultinomialHMM(n_components=3)
            model.startprob_ = np.array([0.6, 0.3, 0.1])
            model.transmat_ = np.array([[0.7, 0.2, 0.1],
                                             [0.3, 0.5, 0.2],
                                             [0.3, 0.3, 0.4]])
            model.emissionprob_ = np.array([[0.4, 0.2, 0.1, 0.3],
                                        [0.3, 0.4, 0.1, 0.2],
                                        [0.1, 0.3, 0.5, 0.1]])

            X, Z = model.sample(1000)

            print(np.asarray(X).T)
            print(Z)
            """
            """
            remodel = hmm.MultinomialHMM(n_components=3, n_iter=100)
            remodel.fit(X)
            Z2 = remodel.predict(X)
            print(Z2)
            """
            """
            seqs = []
            lens = []
            for walk in self._walks:
                s = [[int(w)-1] for w in walk]
                seqs.extend(s)
                lens.append(len(s))

            model = hmm.MultinomialHMM(n_components=params['number_of_topics'], tol=0.001, n_iter=5000)
            model.fit(seqs, lens)

            posteriors = model.predict_proba(np.asarray([[i] for i in range(self.g.number_of_nodes())]))
            comms = np.argmax(posteriors, 1)

            node2comm = {}
            for id in range(len(comms)):
                node2comm[str(id+1)] = comms[id]

            return node2comm
            """
            seqs = []
            lens = []
            for walk in self._walks:
                s = [int(w) - 1 for w in walk]
                seqs.append(s)
                lens.append(len(s))

            pipi = np.asarray([0.5, 0.5], dtype=np.float)
            AA = np.asarray([[0.2, 0.8], [0.5, 0.5]], dtype=np.float)
            OO = np.asarray([[0.9, 0.05, 0.05], [0.05, 0.05, 0.9]],
                            dtype=np.float)

            seqs = []
            for i in range(31):
                seq = []

                s = np.random.choice(range(2), p=pipi)
                o = np.random.choice(range(3), p=OO[s, :])
                seq.append(o)
                for _ in range(59):
                    s = np.random.choice(range(2), p=AA[s, :])
                    o = np.random.choice(range(3), p=OO[s, :])
                    seq.append(o)

                seqs.append(seq)

            seqs = np.vstack(seqs)

            #print(seqs)

            from bayespy.nodes import Categorical, Mixture
            from bayespy.nodes import CategoricalMarkovChain
            from bayespy.nodes import Dirichlet
            from bayespy.inference import VB
            K = params['number_of_topics']  # the number of hidden states
            N = self.g.number_of_nodes()  # the number of observations

            #p0 = np.ones(K) / K

            D = 31  #len(lens)
            states = 60

            a0 = Dirichlet(1e+1 * np.ones(K), plates=())
            A = Dirichlet(1e+1 * np.ones(K), plates=(2, ), name='A')
            P = Dirichlet(1e+1 * np.ones((K, N)))
            Z = CategoricalMarkovChain(a0, A, states=states, plates=(D, ))
            Y = Mixture(Z, Categorical, P)

            Y.observe(seqs)

            #a0.random()
            #A.random()
            #P.random()

            Ainit = np.random.random((2, 2))
            Ainit = np.divide(Ainit.T, np.sum(Ainit, 1)).T

            #A.initialize_from_value(Ainit)
            #print(Ainit)
            Q = VB(Y, Z, P, A, a0)

            Q.update(repeat=1000, plot=False, verbose=True)

            #print(Z.random())
            print(Q['A'])

            return {}

        if method == "LDA":

            # Run GibbsLDA++

            lda_exe_path = c._GIBBSLDA_PATH

            if not os.path.exists(lda_exe_path):
                raise ValueError("Invalid path of GibbsLDA++!")

            temp_lda_folder = "./temp"
            if not os.path.exists(temp_lda_folder):
                os.makedirs(temp_lda_folder)

            temp_dfile_path = os.path.join(temp_lda_folder, temp_dfile_file)

            if not os.path.exists(temp_dfile_path):
                # Save the walks into the dfile
                n = len(self._walks)
                with open(temp_dfile_path, 'w') as f:
                    f.write("{}\n".format(n))
                    for walk in self._walks:
                        f.write("{}\n".format(" ".join(str(w) for w in walk)))

            initial_time = time.time()

            cmd = "{} -est ".format(lda_exe_path)
            cmd += "-alpha {} ".format(params['lda_alpha'])
            cmd += "-beta {} ".format(params['lda_beta'])
            cmd += "-ntopics {} ".format(params['number_of_topics'])
            cmd += "-niters {} ".format(params['lda_number_of_iters'])
            cmd += "-savestep {} ".format(params['lda_number_of_iters'] + 1)
            cmd += "-dfile {} ".format(temp_dfile_path)
            os.system(cmd)

            print(
                "-> The LDA algorithm run in {:.2f} secs".format(time.time() -
                                                                 initial_time))

            # Read wordmap file
            id2node = {}
            temp_wordmap_path = os.path.join(temp_lda_folder, "wordmap.txt")
            with open(temp_wordmap_path, 'r') as f:
                f.readline()  # skip the first line
                for line in f.readlines():
                    tokens = line.strip().split()
                    id2node[int(tokens[1])] = tokens[0]

            # Read phi file
            phi = np.zeros(shape=(params['number_of_topics'], len(id2node)),
                           dtype=np.float)
            temp_phi_path = os.path.join(temp_lda_folder, "model-final.phi")
            with open(temp_phi_path, 'r') as f:
                for topicId, line in enumerate(f.readlines()):
                    phi[topicId, :] = [
                        float(value) for value in line.strip().split()
                    ]

            max_topics = np.argmax(phi, axis=0)

            node2comm = {}
            for nodeId in id2node:
                node2comm[id2node[nodeId]] = max_topics[int(nodeId)]

            return node2comm
Exemplo n.º 22
0
alpha = Gamma(1e-5, 1e-5, plates=(D, ), name='alpha')
A = GaussianARD(0, alpha, shape=(D, ), plates=(D, ), name='A')
X = GaussianMarkovChain(np.zeros(D),
                        1e-3 * np.identity(D),
                        A,
                        np.ones(D),
                        n=N,
                        name='X')
gamma = Gamma(1e-5, 1e-5, plates=(D, ), name='gamma')
C = GaussianARD(0, gamma, shape=(D, ), plates=(M, 1), name='C')
F = Dot(C, X, name='F')
C.initialize_from_random()
tau = Gamma(1e-5, 1e-5, name='tau')
Y = GaussianARD(F, tau, name='Y')
from bayespy.inference import VB
Q = VB(X, C, gamma, A, alpha, tau, Y)
w = 0.3
a = np.array([[np.cos(w), -np.sin(w), 0, 0], [np.sin(w),
                                              np.cos(w), 0, 0], [0, 0, 1, 0],
              [0, 0, 0, 0]])
c = np.random.randn(M, 4)
x = np.empty((N, 4))
f = np.empty((M, N))
y = np.empty((M, N))
x[0] = 10 * np.random.randn(4)
f[:, 0] = np.dot(c, x[0])
y[:, 0] = f[:, 0] + 3 * np.random.randn(M)
for n in range(N - 1):
    x[n + 1] = np.dot(a, x[n]) + [1, 1, 10, 10] * np.random.randn(4)
    f[:, n + 1] = np.dot(c, x[n + 1])
    y[:, n + 1] = f[:, n + 1] + 3 * np.random.randn(M)
Exemplo n.º 23
0
from bayespy.inference import VB
import copy
import numpy as np
import bayespy.plot as bpplt

hidden2 = Categorical((0.7, 0.3))
hidden1 = Mixture(hidden2, Categorical, ((0.6, 0.4), (0.1, 0.9)))

observed1 = Mixture(hidden1, Gaussian, ([-1.0], [0.9]), ([[1.3]], [[0.8]]))
observed2 = Mixture(hidden2, Gaussian, ([-0.9], [1.1]), ([[1.2]], [[0.7]]))

observed_1, observed_2, hidden_1, hidden_2 = copy.deepcopy(
    (observed1, observed2, hidden1, hidden2))
observed_1.observe((-1.2, ))
observed_2.observe((1.2, ))
Q = VB(hidden_1, hidden_2, observed_1, observed_2, tol=1e-10)
Q.update(repeat=100)
print(hidden_1.get_moments())
print(hidden_2.get_moments())

observed_1, observed_2, hidden_1, hidden_2 = copy.deepcopy(
    (observed1, observed2, hidden1, hidden2))
observed_1.observe((-0.2, ))
observed_2.observe((1.2, ))
Q = VB(hidden_1, hidden_2, observed_1, observed_2)
Q.update(repeat=100)
print(hidden_1.get_moments())
print(hidden_2.get_moments())

observed_1, observed_2, hidden_1, hidden_2 = copy.deepcopy(
    (observed1, observed2, hidden1, hidden2))
Exemplo n.º 24
0
                name='X')
alpha = Gamma(1e-3, 1e-3,
              plates=(D,),
              name='alpha')
C = GaussianARD(0, alpha,
                shape=(D,),
                plates=(10,1),
                name='C')
F = Dot(C, X)
tau = Gamma(1e-3, 1e-3, name='tau')
Y = GaussianARD(F, tau)
c = np.random.randn(10, 2)
x = np.random.randn(2, 100)
data = np.dot(c, x) + 0.1*np.random.randn(10, 100)
Y.observe(data)
Y.observe(data, mask=[[True], [False], [False], [True], [True],
                      [False], [True], [True], [True], [False]])
from bayespy.inference import VB
Q = VB(Y, C, X, alpha, tau)
X.initialize_from_parameters(np.random.randn(1, 100, D), 10)
from bayespy.inference.vmp import transformations
rotX = transformations.RotateGaussianARD(X)
rotC = transformations.RotateGaussianARD(C, alpha)
R = transformations.RotationOptimizer(rotC, rotX, D)
Q = VB(Y, C, X, alpha, tau)
Q.callback = R.rotate
Q.update(repeat=1000, tol=1e-6)
Q.update(repeat=50, tol=np.nan)

import bayespy.plot as bpplt
bpplt.pdf(Q['tau'], np.linspace(60, 140, num=100))
Exemplo n.º 25
0
p2 = [0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]

p = np.array([p0, p1, p2])
z = random.categorical([1/3, 1/3, 1/3], size=100)
x = random.bernoulli(p[z])

N = 100
D = 10
K = 3

R = Dirichlet(K*[1e-5],name='R')
Z = Categorical(R,plates=(N,1),name='Z')
P = Beta([0.5, 0.5],plates=(D,K),name='P')
X = Mixture(Z, Bernoulli, P)

Q = VB(Z, R, X, P)
P.initialize_from_random()
X.observe(x)

Q.update(repeat=1000)

#print(" P:")
#print( P.get_moments() )

#print(" R:")
#print( R.get_moments() )

print(" Z:")
print( Z.get_moments() )

print(" X:")
Exemplo n.º 26
0
lung = Mixture(smoking, Categorical, [[0.98, 0.02], [0.25, 0.75]])

bronchitis = Mixture(smoking, Categorical, [[0.97, 0.03], [0.08, 0.92]])

xray = Mixture(tuberculosis, Mixture, lung, Categorical,
               _or([0.96, 0.04], [0.115, 0.885]))

dyspnea = Mixture(
    bronchitis, Mixture, tuberculosis, Mixture, lung, Categorical,
    [_or([0.6, 0.4], [0.18, 0.82]),
     _or([0.11, 0.89], [0.04, 0.96])])

# Mark observations
tuberculosis.observe(TRUE)
smoking.observe(FALSE)
bronchitis.observe(
    TRUE)  # not a "chance" observation as in the original example

# Run inference
Q = VB(dyspnea, xray, bronchitis, lung, smoking, tuberculosis, asia)
Q.update(repeat=100)

# Show results
print("P(asia):", asia.get_moments()[0][TRUE])
print("P(tuberculosis):", tuberculosis.get_moments()[0][TRUE])
print("P(smoking):", smoking.get_moments()[0][TRUE])
print("P(lung):", lung.get_moments()[0][TRUE])
print("P(bronchitis):", bronchitis.get_moments()[0][TRUE])
print("P(xray):", xray.get_moments()[0][TRUE])
print("P(dyspnea):", dyspnea.get_moments()[0][TRUE])
Exemplo n.º 27
0
Arquivo: utils.py Projeto: SongFGH/TNE
def get_node_distr_over_comm(g, walks, method=None, params={}):

    if method == "HMM_param":

        seqs = []
        lens = []
        for walk in walks:
            s = [[int(w)] for w in walk]
            seqs.extend(s)
            lens.append(len(s))

        model = hmm.MultinomialHMM(n_components=params['number_of_topics'],
                                   tol=0.001,
                                   n_iter=5000)
        model.fit(seqs, lens)

        #posteriors = model.predict_proba(np.asarray([[i] for i in range(self.g.number_of_nodes())]))
        #comms = np.argmax(posteriors, 1)

        likelihood = model.emissionprob_
        """
        comms = np.argmax(likelihood, 0)

        node2comm = {}
        for id in range(len(comms)):
            node2comm[str(id)] = comms[id]

        return node2comm
        """

    elif method == "Nonparam_HMM":

        seqs = []
        lens = []
        for walk in walks:
            s = [int(w) for w in walk]
            seqs.append(s)
            lens.append(len(s))

        seqs = np.vstack(seqs)

        K = params['number_of_topics']  # the number of hidden states
        O = g.number_of_nodes()  # the size of observation set
        L = len(seqs[0])  # the length of each sequence
        N = len(seqs)  # the number of sequences

        p0 = params['prior_p0']  # a vector of size K
        t0 = params['prior_t0']  # a vector of size K
        e0 = params['prior_e0']  # a vector of size K

        p = bayes.Dirichlet(p0 * np.ones(K), name='p')

        T = bayes.Dirichlet(t0 * np.ones(K), plates=(K, ), name='T')

        E = bayes.Dirichlet(e0 * np.ones(O), plates=(K, ), name='E')

        Z = bayes.CategoricalMarkovChain(p,
                                         T,
                                         states=L,
                                         name='Z',
                                         plates=(N, ))

        # Emission/observation distribution
        X = bayes.Mixture(Z, bayes.Categorical, E, name='X')

        p.initialize_from_random()
        T.initialize_from_random()
        E.initialize_from_random()

        Q = VB(X, Z, p, T, E)

        Q['X'].observe(seqs)
        Q.update(repeat=1000)

        likelihood = Q['E'].random()
        """
        comms = np.argmax(likelihood, 0)

        node2comm = {}
        for id in range(len(comms)):
            node2comm[str(id)] = comms[id]

        return node2comm
        """

        return likelihood

    elif method == "LDA":

        # Run GibbsLDA++
        if not os.path.exists(GIBBSLDA_PATH):
            raise ValueError("Invalid path of GibbsLDA++!")

        temp_lda_folder = os.path.join(TEMP_FOLDER, "lda_temp")
        if not os.path.exists(temp_lda_folder):
            os.makedirs(temp_lda_folder)

        temp_dfile_path = os.path.join(temp_lda_folder, "gibblda_temp.dfile")
        # Save the walks into the dfile
        n = len(walks)
        with open(temp_dfile_path, 'w') as f:
            f.write("{}\n".format(n))
            for walk in walks:
                f.write("{}\n".format(" ".join(str(w) for w in walk)))

        initial_time = time.time()
        cmd = "{} -est ".format(GIBBSLDA_PATH)
        cmd += "-alpha {} ".format(params['lda_alpha'])
        cmd += "-beta {} ".format(params['lda_beta'])
        cmd += "-ntopics {} ".format(params['number_of_topics'])
        cmd += "-niters {} ".format(params['lda_number_of_iters'])
        cmd += "-savestep {} ".format(params['lda_number_of_iters'] + 1)
        cmd += "-dfile {} ".format(temp_dfile_path)
        os.system(cmd)

        print("-> The LDA algorithm run in {:.2f} secs".format(time.time() -
                                                               initial_time))

        # Read wordmap file
        id2node = {}
        temp_wordmap_path = os.path.join(temp_lda_folder, "wordmap.txt")
        with open(temp_wordmap_path, 'r') as f:
            f.readline()  # skip the first line
            for line in f.readlines():
                tokens = line.strip().split()
                id2node[int(tokens[1])] = tokens[0]

        # Read phi file
        num_of_nodes = len(id2node)
        phi = np.zeros(shape=(params['number_of_topics'], num_of_nodes),
                       dtype=np.float)
        temp_phi_path = os.path.join(temp_lda_folder, "model-final.phi")
        with open(temp_phi_path, 'r') as f:
            for comm, line in enumerate(f.readlines()):
                for id, value in enumerate(line.strip().split()):
                    phi[comm, int(id2node[id])] = value

        # Read the tassign file, generate topic corpus
        temp_tassing_path = os.path.join(temp_lda_folder,
                                         "model-final.tassign")
        comm_corpus = []
        with smart_open(temp_tassing_path, 'r') as f:
            for line in f:
                tokens = line.strip().split()
                comm_corpus.append([token.split(':')[1] for token in tokens])
        """
        max_topics = np.argmax(phi, axis=0)

        node2comm = {}
        for nodeId in id2node:
            node2comm[id2node[nodeId]] = max_topics[int(nodeId)]

        return node2comm
        """

        return phi, comm_corpus
    else:
        raise ValueError("Wrong parameter name!")
Exemplo n.º 28
0
import numpy as np

np.random.seed(1)
data = np.random.normal(5, 10, size=(10, ))
from bayespy.nodes import GaussianARD, Gamma

mu = GaussianARD(0, 1e-6)
tau = Gamma(1e-6, 1e-6)
y = GaussianARD(mu, tau, plates=(10, ))
y.observe(data)
from bayespy.inference import VB

Q = VB(mu, tau, y)
Q.update(repeat=20)
import bayespy.plot as bpplt

bpplt.pyplot.subplot(2, 1, 1)
bpplt.pdf(mu, np.linspace(-10, 20, num=100), color='k', name=r'\mu')
bpplt.pyplot.subplot(2, 1, 2)
bpplt.pdf(tau, np.linspace(1e-6, 0.08, num=100), color='k', name=r'\tau')
bpplt.pyplot.tight_layout()
bpplt.pyplot.show()
Exemplo n.º 29
0
def model(n_documents,
          n_topics,
          n_vocabulary,
          corpus,
          word_documents,
          plates_multiplier=1):
    '''
    Construct Latent Dirichlet Allocation model.
    
    Parameters
    ----------
    
    documents : int
        The number of documents

    topics : int
        The number of topics

    vocabulary : int
        The number of words in the vocabulary

    corpus : integer array
        The vocabulary index of each word in the corpus

    word_documents : integer array
        The document index of each word in the corpus
    '''

    # Topic distributions for each document
    p_topic = nodes.Dirichlet(np.ones(n_topics),
                              plates=(n_documents, ),
                              name='p_topic')

    # Word distributions for each topic
    p_word = nodes.Dirichlet(np.ones(n_vocabulary),
                             plates=(n_topics, ),
                             name='p_word')

    # Use a simple wrapper node so that the value of this can be changed if one
    # uses stocahstic variational inference
    word_documents = Constant(CategoricalMoments(n_documents),
                              word_documents,
                              name='word_documents')

    # Choose a topic for each word in the corpus
    topics = nodes.Categorical(nodes.Gate(word_documents, p_topic),
                               plates=(len(corpus), ),
                               plates_multiplier=(plates_multiplier, ),
                               name='topics')

    # Choose each word in the corpus from the vocabulary
    words = nodes.Categorical(nodes.Gate(topics, p_word), name='words')

    # Observe the corpus
    words.observe(corpus)

    # Break symmetry by random initialization
    p_topic.initialize_from_random()
    p_word.initialize_from_random()

    return VB(words, topics, p_word, p_topic, word_documents)
Exemplo n.º 30
0
# -----Performing inference------
# 1: Observe some nodes
c = np.random.randn(10, 2)
x = np.random.randn(2, 100)
data = np.dot(c, x) + 0.1 * np.random.randn(10, 100)
# data:10×100

Y.observe(data)
#( Missing values)
Y.observe(data,
          mask=[[True], [False], [False], [True], [True], [False], [True],
                [True], [True], [False]])

# 2: Choosing the inference method
from bayespy.inference import VB
Q = VB(Y, C, X, alpha, tau)

# 3: Initializing the posterior approximation
X.initialize_from_parameters(np.random.randn(1, 100, D), 10)

# 4: Running the inference algorithm
# Q.update()
# Q.update(C, X)
# Q.update(C, X, C, tau)
# Q.update(repeat=10)
# Q.update(repeat=1000)
Q.update(repeat=10000, tol=1e-5)
# C.update()

#( 5 : Parameter expansion 収束が遅い時)
# from bayespy.inference.vmp import transformations
Exemplo n.º 31
0
    def run(self, K=25, beta=0.5, alpha=0.00001, foci_thresh=0, num_neigh=4, hinton_plot=False, end=False):
        '''Performs one run of the BBDP according to the specified parameters.'''

        print("Transforming WCS participant data into binary vectors...")
        x = u.transform_data_all(self.langs, norm=False, end=end, foci=True, foci_thresh=foci_thresh, num_neigh=num_neigh)
        print("Finished transforming participant data") 
        self.participant_list = x[0]
        
        N = len(x[0])            #number of data points (i.e. WCS participants)
        D = np.shape(x[1])[1]    #number of features
        #K = 20            #number of initial clusters
        
        R = Dirichlet(K*[alpha],
                      name='R')
        Z = Categorical(R,
                        plates=(N,1),
                        name='Z')
        
        P = Beta([beta, beta],
                 plates=(D,K),
                 name='P')
        
        X = Mixture(Z, Bernoulli, P)
        
        Q = VB(Z, R, X, P)
        P.initialize_from_random()
        X.observe(x[1])
        Q.update(repeat=1000)

        if hinton_plot:
            bpplt.hinton(Z)
            bpplt.pyplot.show()
            
            bpplt.hinton(R)
            bpplt.pyplot.show()

        #Get the weight matrix stored in Z (weights determine which cluster data point belongs to)
        z = Z._message_to_child()[0]
        z = z * np.ones(Z.plates+(1,))
        z = np.squeeze(z)
        self.z = z

        #Get the weights stored in R (proportional to the size of the clusters)
        r = np.exp(R._message_to_child()[0])
        r = r * np.ones(R.plates+(1,))
        r = np.squeeze(r)
        self.r = r

        #Get the cluster assignment of each data point
        self.c_assign = np.argmax(self.z, axis=1)

        #Write cluster results to a file
        if self.write_to_file:
            if end:
                save_path = "cluster_results_end_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            else:
                save_path = "cluster_results_K={}_B={}_a={}_t={}_nn={}".format(K, beta, alpha, foci_thresh, num_neigh)
            while path.exists(save_path+".txt"):
                #save_path already exists
                try:
                    old_file_num = int(save_path[save_path.find('(')+1:-1])
                    new_file_num = old_file_num + 1
                    save_path = save_path[0:save_path.find('(')] + '(' + str(new_file_num) + ')'
                except ValueError:
                    save_path = save_path + " (1)"

            self.save_path = save_path       
            file = open(path.abspath(self.save_path+".txt"), 'w')
            
            #Write cluster assignment matrix Z (gives the probability that observation i belongs to cluster j)
            if 'Z' not in self.in_file:
                for i in range(len(self.z)):
                    line = "\t".join([str(x) for x in self.z[i]]) + "\n"
                    file.write(line)
                file.write('---Z\n')
                self.in_file.append('Z')

            #Write cluster weights matrix R (proportional to the size of the resulting clusters)
            if 'R' not in self.in_file:
                line = "\t".join([str(x) for x in self.r]) + "\n"
                file.write(line)
                file.write('---R\n')
                self.in_file.append('R')

            #Write deterministic cluster assignments with the corresponding participant key
            if 'C' not in self.in_file:
                line1 = "\t".join([str(x) for x in self.participant_list]) + "\n"
                line2 = "\t".join([str(x) for x in self.c_assign]) + "\n"              
                file.write(line1)
                file.write(line2)
                file.write('---C\n')
                self.in_file.append('C')
            
            file.close()

        return self.c_assign
Exemplo n.º 32
0
import numpy
numpy.random.seed(1)
p0 = [0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1, 0.9]
p1 = [0.1, 0.1, 0.1, 0.1, 0.1, 0.9, 0.9, 0.9, 0.9, 0.9]
p2 = [0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1]
import numpy as np
p = np.array([p0, p1, p2])
from bayespy.utils import random
z = random.categorical([1 / 3, 1 / 3, 1 / 3], size=100)
x = random.bernoulli(p[z])
N = 100
D = 10
K = 10
from bayespy.nodes import Categorical, Dirichlet
R = Dirichlet(K * [1e-5], name='R')
Z = Categorical(R, plates=(N, 1), name='Z')
from bayespy.nodes import Beta
P = Beta([0.5, 0.5], plates=(D, K), name='P')
from bayespy.nodes import Mixture, Bernoulli
X = Mixture(Z, Bernoulli, P)
from bayespy.inference import VB
Q = VB(Z, R, X, P)
P.initialize_from_random()
X.observe(x)
Q.update(repeat=1000)
import bayespy.plot as bpplt
bpplt.hinton(P)
bpplt.pyplot.show()
Exemplo n.º 33
0
Zi = Categorical(alpha, plates=(N, ), name='zi')

from bayespy.nodes import Gaussian, Wishart

mui = Gaussian(np.zeros(D), 1e-5 * np.identity(D), plates=(K, ), name='mui')
Lambdai = Wishart(D, 1e-5 * np.identity(D), plates=(K, ), name='Lambdai')

from bayespy.nodes import Mixture

Y = Mixture(Zi, Gaussian, mui, Lambdai, name='Y')

Zi.initialize_from_random()

from bayespy.inference import VB

Q = VB(Y, mui, Lambdai, Zi, alpha)

Y.observe(np.reshape(C_mat, (-1, 2)))

Q.update(repeat=10)
#%%
K = 5  #hyperparameter
neta = 1e-6 * np.ones(K)  #hyperparameter
print(neta.shape)
print(neta)
PI = bayespy.nodes.Dirichlet(neta, name='PI')
#%%
Z = bayespy.nodes.Categorical(PI, plates=(m, n, K), name='Z')

mean_vec = np.zeros(d)  # to be initialized accorinding to image
precission_mat = 1e-5 * np.identity(
Exemplo n.º 34
0
    y = y.reshape(y.shape[0], )

    X = x2.reshape(x2.shape[0], 1)

    from bayespy.nodes import GaussianARD
    B = GaussianARD(0, 1e-6, shape=(X.shape[1], ))
    from bayespy.nodes import SumMultiply
    F = SumMultiply('i,i', B, X)

    from bayespy.nodes import Gamma
    tau = Gamma(1e-3, 1e-3)
    Y = GaussianARD(F, tau)
    Y.observe(y)
    from bayespy.inference import VB
    Q = VB(Y, B, tau)
    #Q.update(repeat=100990)
    distribution = []
    result = []
    distribution = F.get_moments()
    for min_val, max_val in zip(distribution[0], distribution[1]):
        #mean = []
        mean = (min_val + max_val) / 2
        result.append(mean)
        #result = mean
        #x3 = []
        #x3 = pd.DataFrame({result:buffer_data})
        #x1 = x1.append(x3)
    x1[buffer_data] = result

print(x1)
Exemplo n.º 35
0
        def fit(self, X, y):
            """Fit Multivariate Gaussian model per class using Variational Inference.
	        Parameters
	        ----------
	        X : {array-like}, shape = [n_samples,n_features]
	            Training data
	        y : array-like, shape = [n_samples]
	            Target values
	        Returns
	        -------
	        self : returns an instance of self.
	        """
            n_samples, n_features = X.shape

            classes_ = np.unique(y)
            n_classes_ = len(classes_)

            n_estimators = n_features / n_classes_

            def remove_outliers(X, y):

                classes = np.unique(y)

                n_classes = len(classes)

                n_estimators = int(X.shape[1] / n_classes)

                Xt = X.reshape((X.shape[0], n_estimators, n_classes))

                yt = np.repeat(y, n_estimators).reshape((len(y), n_estimators))

                rate = (yt == classes.take(np.argmax(Xt, axis=2))).sum(1)

                return np.where(rate > 0.0)[0]

            self.models_ = []
            for i, Y in enumerate(classes_):
                features = np.arange(n_estimators,
                                     dtype=int) * (n_classes_) + i
                L = X[y == Y, :]

                N, D = L.shape

                Lambda = nodes.Wishart(D, np.identity(D))
                mu = nodes.Gaussian(np.zeros(D), np.identity(D))

                x = nodes.Gaussian(mu, Lambda, plates=(N, ))
                x.observe(L)

                Q = VB(x, mu, Lambda)
                Q.update(repeat=2000, tol=0, verbose=False)

                cov = np.linalg.inv(Lambda.u[0])
                m = mu.u[0]

                self.models_.append([m, cov, float(L.shape[0]) / n_samples])

            if (self.weight_class):
                self.w = X.shape[0] / (n_classes_ *
                                       np.bincount(np.asarray(y, dtype=int)))
            else:
                self.w = np.ones(n_classes_)

            self.n_classes_ = n_classes_
            self.classes_ = classes_
            self.n_estimators = n_estimators

            return self
Exemplo n.º 36
0
import numpy
numpy.random.seed(1)
M = 20
N = 100
import numpy as np
x = np.random.randn(N, 2)
w = np.random.randn(M, 2)
f = np.einsum('ik,jk->ij', w, x)
y = f + 0.1 * np.random.randn(M, N)
D = 10
from bayespy.nodes import GaussianARD, Gamma, SumMultiply
X = GaussianARD(0, 1, plates=(1, N), shape=(D, ))
alpha = Gamma(1e-5, 1e-5, plates=(D, ))
C = GaussianARD(0, alpha, plates=(M, 1), shape=(D, ))
F = SumMultiply('d,d->', X, C)
tau = Gamma(1e-5, 1e-5)
Y = GaussianARD(F, tau)
Y.observe(y)
from bayespy.inference import VB
Q = VB(Y, X, C, alpha, tau)
C.initialize_from_random()
from bayespy.inference.vmp.transformations import RotateGaussianARD
rot_X = RotateGaussianARD(X)
rot_C = RotateGaussianARD(C, alpha)
from bayespy.inference.vmp.transformations import RotationOptimizer
R = RotationOptimizer(rot_X, rot_C, D)
Q.set_callback(R.rotate)
Q.update(repeat=1000)
import bayespy.plot as bpplt
bpplt.plot(F)
bpplt.plot(f, color='r', marker='x', linestyle='None')
Exemplo n.º 37
0
import numpy
numpy.random.seed(1)
from bayespy.nodes import CategoricalMarkovChain
a0 = [0.6, 0.4] # p(rainy)=0.6, p(sunny)=0.4
A = [[0.7, 0.3], # p(rainy->rainy)=0.7, p(rainy->sunny)=0.3
     [0.4, 0.6]] # p(sunny->rainy)=0.4, p(sunny->sunny)=0.6
N = 100
Z = CategoricalMarkovChain(a0, A, states=N)
from bayespy.nodes import Categorical, Mixture
P = [[0.1, 0.4, 0.5],
     [0.6, 0.3, 0.1]]
Y = Mixture(Z, Categorical, P)
weather = Z.random()
activity = Mixture(weather, Categorical, P).random()
Y.observe(activity)
from bayespy.inference import VB
Q = VB(Y, Z)
Q.update()
import bayespy.plot as bpplt
bpplt.plot(Z)
bpplt.plot(1-weather, color='r', marker='x')
bpplt.pyplot.show()