def simulate_data(nCells = 5*10**4, nPersons = 40, seed = 123456, ratio_P =  [1., 1., 0.8, 0.1]):
	"""
		Simulates the data following the instruction presented in the article
	
	"""

	if seed != None:
		npr.seed(seed)
		
		
	nClass = 4
	dim    = 3
	P = [0.49, 0.3, 0.2 , 0.01 ]
	Thetas = [np.array([0.,0, 0]), np.array([0, -2, 1]), np.array([1., 2, 0]), np.array([-2,2,1.5])]
	Z_Sigma  = [np.array([[1.27, 0.25, 0],[0.25, 0.27, -0.001],[0., -0.001, 0.001]]),
			    np.array([[0.06, 0.04, -0.03],[0.04, 0.05, 0],[-0.03, 0., 0.09]]),
			    np.array([[0.44, 0.08, 0.08],[0.08, 0.16, 0],[0.08, 0., 0.16]]),
			    0.01*np.eye(3)]
	Sigmas = [0.1*np.eye(3), 0.1*spl.toeplitz([2.,0.5,0]),0.1* spl.toeplitz([2.,-0.5,1]),
			  0.1*spl.toeplitz([1.,.3,.3]) ] 
	

		
	act_Class = np.zeros((nPersons,4))
	for i in range(nClass):
		act_Class[:np.ceil(nPersons*ratio_P[i]),i] = 1.
	Y = []
	
	nu  = 100
	mus = []
	for i in range(nPersons):
		mix_obj = GMM.mixture(K = np.int(np.sum(act_Class[i, :])))
		theta_temp  = []
		sigma_temp  = []
		for j in range(nClass):
			if act_Class[i, j] == 1:
				theta_temp.append(Thetas[j] +  npr.multivariate_normal(np.zeros(3), Z_Sigma[j]))
				sigma_temp.append(wishart.invwishartrand(nu, (nu - dim - 1)* Sigmas[j]))
			else:
				theta_temp.append(np.ones(dim)*np.NAN)
				sigma_temp.append(np.ones((dim,dim))*np.NAN)
		theta_temp_ = [  theta_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]]
		sigma_temp_ = [  sigma_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]]

		mix_obj.mu = theta_temp_
		mus.append(theta_temp)
		mix_obj.sigma = sigma_temp_
		
		
		p_ = np.array([ (0.2*np.random.rand()+0.9) * P[aC]  for aC in np.where(act_Class[i, :] == 1)[0]]  )
		p_ /= np.sum(p_)
		mix_obj.p = p_
		mix_obj.d = dim
		Y.append(mix_obj.simulate_data(nCells))
	mus = np.array(mus)
	return Y, act_Class, mus.T, Thetas, Sigmas, P
def main_python(K = 5):
    sim = 100
    data = np.ascontiguousarray(np.loadtxt('../data/flowcym.dat',skiprows=1,usecols=(1,2,3,4,5,6)))
    mix = GMM.mixture(data,K)
    t0 = time.time()
    for i in range(sim):  # @UnusedVariable
        mix.sample()
    t1 = time.time()

    
    print("mixture took %.4f sec"%(t1-t0))
def main_python(K=5):
    sim = 100
    data = np.ascontiguousarray(
        np.loadtxt('../data/flowcym.dat',
                   skiprows=1,
                   usecols=(1, 2, 3, 4, 5, 6)))
    mix = GMM.mixture(data, K)
    t0 = time.time()
    for i in range(sim):  # @UnusedVariable
        mix.sample()
    t1 = time.time()

    print("mixture took %.4f sec" % (t1 - t0))
 def test_sampling(self):
     GMM.sample_sigma(self.mix.data, self.mix.mu[0],
                      self.mix.prior[0]["sigma"]["Q"],
                      self.mix.prior[0]["sigma"]["nu"])
     self.mix.sample_x()
     self.mix.sample_sigma()
 def test_error(self):
     with self.assertRaisesRegexp(
             ValueError,
             "the number of observations must be larger then the dimenstion"
     ):
         GMM.mixture(np.ones((1, 1)), 2)
 def setUp(self):
     self.K = 2
     self.data = np.random.rand(self.n, 2) + 1
     self.mix = GMM.mixture(self.data, self.K)
     self.mix2 = GMM.mixture(self.data, self.K)
     self.mix2.high_memory = False
def simulate_data_(thetas,
                   sigma_theta,
                   sigmas,
                   weights,
                   nu=100,
                   ratio_act=None,
                   n_cells=5 * 10**4,
                   n_persons=40,
                   seed=None,
                   silent=True):
    """
		simulating data given:
		*thetas*	  list of latent means
		*sigma_theta* variation between the means
		*sigmas*	  list of latent covariances
		*weights*	 list of probabilites
		*nu*		  inverse wishart parameter
		*ratio_act*	 probabilility that the cluster is active at a person
		*n_cells*	 number of cells at a person
		*n_persons*   number of persons
		*seed*		random number generator
	"""

    if seed is None:
        npr.seed(seed)

    K = len(weights)
    dim = thetas[0].shape[0]
    if ratio_act is None:
        ratio_act = np.ones(K)

    act_class = np.zeros((n_persons, K))
    for i in range(K):
        act_class[:np.int(np.ceil(n_persons * ratio_act[i])), i] = 1.
    Y = []
    x = []
    nu = 100
    mus = []

    for i in range(n_persons):

        if not silent:
            print("setting up person_{i}: ".format(i=i), end='')
            sys.stdout.flush()

        mix_obj = GMM.mixture(K=np.int(np.sum(act_class[i, :])))
        theta_temp = []
        sigma_temp = []
        for j in range(K):
            if act_class[i, j] == 1:
                theta_temp.append(thetas[j] +
                                  util.rmvn(np.zeros((dim,
                                                      1)), sigma_theta[j]))
                sigma_temp.append(
                    wishart.invwishartrand(nu, (nu - dim - 1) * sigmas[j]))
            else:
                theta_temp.append(np.ones(dim) * np.NAN)
                sigma_temp.append(np.ones((dim, dim)) * np.NAN)
        theta_temp_ = [
            theta_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]
        ]
        sigma_temp_ = [
            sigma_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]
        ]

        mix_obj.mu = theta_temp_
        mus.append(theta_temp)
        mix_obj.sigma = sigma_temp_

        p_ = np.array([(0.2 * np.random.rand() + 0.9) * weights[aC]
                       for aC in np.where(act_class[i, :] == 1)[0]])
        p_ /= np.sum(p_)
        mix_obj.p = p_
        mix_obj.d = dim
        #Y_, x_ =  mix_obj.simulate_data2(np.int(np.floor(0.99*n_cells)))
        Y_, x_ = mix_obj.simulate_data2(n_cells)

        noise_variance = np.eye(mix_obj.d)
        np.fill_diagonal(noise_variance, np.var(Y_, 0))
        #Y_noise = npr.multivariate_normal(np.mean(Y_,0), noise_variance, size = np.int(np.ceil(0.01*n_cells)))
        #Y_ = np.vstack((Y_,Y_noise))
        #np.random.shuffle(Y_)
        Y.append(Y_)
        x.append(x_)

        if not silent:
            print("done")
            sys.stdout.flush()

    mus = np.array(mus)

    return Y, act_class, mus.T, x
        mix.sample()
        mus[i, :data.shape[1]] = mix.mu[0]
        mus[i, data.shape[1]:] = mix.mu[1]
    t1 = time.time()
    if 1:
        for k in range(mix.K):
            plt.plot(mix.data[mix.x == k, 0], mix.data[mix.x == k, 1], 'o')

        plt.figure()
        for k in range(mix.K):
            plt.plot(mus[:, (2 * k):(2 * (k + 1))])

        plt.show()

    print("mixture took %.4f sec" % (t1 - t0))
    mix2 = GMM.mixture(data, K)
    mus = np.zeros((sim, 4))
    t0 = time.time()
    for i in range(sim):
        mix2.sample()
    t1 = time.time()
    print("Python mixture took %.4f sec" % (t1 - t0))
    if 0:
        import pstats, cProfile

        import pyximport
        pyximport.install()

        import bayesianmixture.distributions.rng_cython as rng_cython

        #cProfile.runctx("rng_cython.sample_mu_rep(np.sum(mix.data[mix.x == 0 ,:],1),mix.sigma[0],mix.prior[0]['mu']['theta'].reshape(mix.d),mix.prior[0]['mu']['sigma'],npr.rand(mix.d),10000)", globals(), locals(), "Profile.prof")
     mix.sample()
     mus[i,:data.shape[1]] = mix.mu[0]
     mus[i,data.shape[1]:] = mix.mu[1]
 t1 = time.time()
 if 1:
     for k in range(mix.K):
         plt.plot(mix.data[mix.x==k,0],mix.data[mix.x==k,1],'o')
     
     plt.figure()
     for k in range(mix.K):
         plt.plot(mus[:,(2*k):(2*(k+1))])
         
     plt.show()
 
 print("mixture took %.4f sec"%(t1-t0))
 mix2 = GMM.mixture(data,K)
 mus = np.zeros((sim,4))
 t0 = time.time()
 for i in range(sim):
     mix2.sample()
 t1 = time.time()
 print("Python mixture took %.4f sec"%(t1-t0))
 if 0:
     import pstats, cProfile
 
     import pyximport
     pyximport.install()
     
     import bayesianmixture.distributions.rng_cython as rng_cython
 
     #cProfile.runctx("rng_cython.sample_mu_rep(np.sum(mix.data[mix.x == 0 ,:],1),mix.sigma[0],mix.prior[0]['mu']['theta'].reshape(mix.d),mix.prior[0]['mu']['sigma'],npr.rand(mix.d),10000)", globals(), locals(), "Profile.prof")
Exemplo n.º 10
0
def simulate_data_( thetas, sigma_theta, sigmas, weights, nu = 100, ratio_act = None, n_cells = 5*10**4, n_persons = 40,
					seed = None, silent = True):
	"""
		simulating data given:
		*thetas*	  list of latent means
		*sigma_theta* variation between the means
		*sigmas*	  list of latent covariances
		*weights*	 list of probabilites
		*nu*		  inverse wishart parameter
		*ratio_act*	 probabilility that the cluster is active at a person
		*n_cells*	 number of cells at a person
		*n_persons*   number of persons
		*seed*		random number generator
	"""
	
	if seed is None:
		npr.seed(seed)
		
		
	K = len(weights)
	dim = thetas[0].shape[0]
	if ratio_act is None:
		ratio_act = np.ones(K )
		
		
	act_class = np.zeros((n_persons, K))
	for i in range(K):
		act_class[:np.int(np.ceil(n_persons * ratio_act[i])), i] = 1.
	Y = []
	x = []
	nu  = 100
	mus = []
	
	
	
	for i in range(n_persons):
		
		if not silent:
			print("setting up person_{i}: ".format(i = i),end = '')
			sys.stdout.flush()
			
		
		mix_obj = GMM.mixture(K = np.int(np.sum(act_class[i, :])))
		theta_temp  = []
		sigma_temp  = []
		for j in range(K):
			if act_class[i, j] == 1:
				theta_temp.append(thetas[j] +  util.rmvn( np.zeros((dim, 1)), sigma_theta[j] ))
				sigma_temp.append(wishart.invwishartrand(nu, (nu - dim - 1) * sigmas[j]))
			else:
				theta_temp.append(np.ones(dim) * np.NAN)
				sigma_temp.append(np.ones((dim,dim)) * np.NAN)
		theta_temp_ = [  theta_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]]
		sigma_temp_ = [  sigma_temp[aC] for aC in np.where(act_class[i, :] == 1)[0]]

		mix_obj.mu = theta_temp_
		mus.append(theta_temp)
		mix_obj.sigma = sigma_temp_
		
		
		p_ = np.array([ (0.2*np.random.rand()+0.9) * weights[aC]  for aC in np.where(act_class[i, :] == 1)[0]]  )
		p_ /= np.sum(p_)
		mix_obj.p = p_
		mix_obj.d = dim
		#Y_, x_ =  mix_obj.simulate_data2(np.int(np.floor(0.99*n_cells)))
		Y_, x_ =  mix_obj.simulate_data2(n_cells)
		
		noise_variance = np.eye(mix_obj.d)
		np.fill_diagonal(noise_variance, np.var(Y_,0))
		#Y_noise = npr.multivariate_normal(np.mean(Y_,0), noise_variance, size = np.int(np.ceil(0.01*n_cells)))
		#Y_ = np.vstack((Y_,Y_noise))
		#np.random.shuffle(Y_)
		Y.append(Y_)
		x.append(x_)
		
		if not silent:
			print("done")
			sys.stdout.flush()
		
	mus = np.array(mus)
	
	return Y, act_class, mus.T, x
def simulate_data(nCells=5 * 10**4,
                  nPersons=40,
                  seed=123456,
                  ratio_P=[1., 1., 0.8, 0.1]):
    """
		Simulates the data following the instruction presented in the article
	
	"""

    if seed != None:
        npr.seed(seed)

    nClass = 4
    dim = 3
    P = [0.49, 0.3, 0.2, 0.01]
    Thetas = [
        np.array([0., 0, 0]),
        np.array([0, -2, 1]),
        np.array([1., 2, 0]),
        np.array([-2, 2, 1.5])
    ]
    Z_Sigma = [
        np.array([[1.27, 0.25, 0], [0.25, 0.27, -0.001], [0., -0.001, 0.001]]),
        np.array([[0.06, 0.04, -0.03], [0.04, 0.05, 0], [-0.03, 0., 0.09]]),
        np.array([[0.44, 0.08, 0.08], [0.08, 0.16, 0], [0.08, 0., 0.16]]),
        0.01 * np.eye(3)
    ]
    Sigmas = [
        0.1 * np.eye(3), 0.1 * spl.toeplitz([2., 0.5, 0]),
        0.1 * spl.toeplitz([2., -0.5, 1]), 0.1 * spl.toeplitz([1., .3, .3])
    ]

    act_Class = np.zeros((nPersons, 4))
    for i in range(nClass):
        act_Class[:np.ceil(nPersons * ratio_P[i]), i] = 1.
    Y = []

    nu = 100
    mus = []
    for i in range(nPersons):
        mix_obj = GMM.mixture(K=np.int(np.sum(act_Class[i, :])))
        theta_temp = []
        sigma_temp = []
        for j in range(nClass):
            if act_Class[i, j] == 1:
                theta_temp.append(
                    Thetas[j] +
                    npr.multivariate_normal(np.zeros(3), Z_Sigma[j]))
                sigma_temp.append(
                    wishart.invwishartrand(nu, (nu - dim - 1) * Sigmas[j]))
            else:
                theta_temp.append(np.ones(dim) * np.NAN)
                sigma_temp.append(np.ones((dim, dim)) * np.NAN)
        theta_temp_ = [
            theta_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]
        ]
        sigma_temp_ = [
            sigma_temp[aC] for aC in np.where(act_Class[i, :] == 1)[0]
        ]

        mix_obj.mu = theta_temp_
        mus.append(theta_temp)
        mix_obj.sigma = sigma_temp_

        p_ = np.array([(0.2 * np.random.rand() + 0.9) * P[aC]
                       for aC in np.where(act_Class[i, :] == 1)[0]])
        p_ /= np.sum(p_)
        mix_obj.p = p_
        mix_obj.d = dim
        Y.append(mix_obj.simulate_data(nCells))
    mus = np.array(mus)
    return Y, act_Class, mus.T, Thetas, Sigmas, P