Exemplo n.º 1
0
 def test_negative_binomial(self):
     """
     Test NB log-likelihood, nb_cluster
     """
     P = np.array([[0.5,0.4,0.8],
                   [0.5,0.3,0.7],
                   [0.5,0.3,0.9]])
     R = np.array([[1.,8.,10.],
                   [2.,8.,24],
                   [3.,6.,30.]])
     data, labels = simulation.generate_nb_data(P, R, 100)
     data = data.astype(float)
     #data += 1e-8
     ll = nb_ll(data, P, R)
     self.assertEqual(ll.shape, (100,3))
     self.assertFalse(np.isnan(ll).any())
     self.assertFalse(np.isinf(ll).any())
     # test derivative
     # test nb cluster
     # how to test the results... they're often not good...
     a,p,r = nb_cluster(data,3)
     self.assertEqual(p.shape, P.shape)
     self.assertEqual(r.shape, R.shape)
     p_nans = np.isnan(p)
     r_nans = np.isnan(r)
     self.assertFalse(p_nans.any())
     self.assertFalse(r_nans.any())
     # assert that all the points aren't being put into
     # the same cluster.
     self.assertTrue(purity(labels, a) > 0.8)
     self.assertFalse((a==a[0]).all())
Exemplo n.º 2
0
 def test_cluster(self):
     data = self.data
     assignments, centers = uncurl.poisson_cluster(data, 2)
     self.assertEqual(assignments.shape[0], data.shape[1])
     self.assertEqual(centers.shape[0], data.shape[0])
     # just checking that the values are valid
     self.assertFalse(np.isnan(centers).any())
     self.assertTrue(purity(assignments, self.labs) > 0.8)
Exemplo n.º 3
0
 def test_simulation(self):
     """
     Basically this is to test that the Poisson EM can correctly separate
     clusters in simulated data.
     """
     centers = np.array([[1,10,20], [1, 11, 1], [50, 1, 100]])
     centers = centers.astype(float)
     data, labs = generate_poisson_data(centers, 500)
     data = data.astype(float)
     data = sparse.csc_matrix(data)
     assignments, c_centers = uncurl.poisson_cluster(data, 3)
     distances = np.zeros((3,3))
     for i in range(3):
         for j in range(3):
             distances[i,j] = uncurl.poisson_dist(centers[:,i], c_centers[:,j])
     print(assignments)
     print(labs)
     print(purity(assignments, labs))
     self.assertTrue(purity(assignments, labs) > 0.65)
Exemplo n.º 4
0
 def test_zip_simulation(self):
     """
     ZIP clustering on poisson-simulated data
     """
     centers = np.array([[0.1, 10, 20], [0.1, 11, 0.1], [50, 0.1, 100]])
     centers = centers.astype(float)
     data, labs = generate_poisson_data(centers, 500)
     data = data.astype(float)
     assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
     self.assertTrue(purity(assignments, labs) > 0.8)
Exemplo n.º 5
0
 def test_zip_simulation_2(self):
     """
     ZIP clustering on ZIP-simulated data
     """
     centers = np.random.randint(10, 1000, (3, 3))
     L = np.random.random((3, 3))
     print(centers)
     print(L)
     centers = centers.astype(float)
     data, labs = generate_zip_data(centers, L, 1000)
     data = data.astype(float)
     print(data)
     assignments, c_centers, c_zeros = uncurl.zip_cluster(data, 3)
     distances = np.zeros((3, 3))
     for i in range(3):
         for j in range(3):
             distances[i, j] = uncurl.poisson_dist(centers[:, i],
                                                   c_centers[:, j])
     print(c_centers)
     print(c_zeros)
     print(purity(assignments, labs))
     self.assertTrue(purity(assignments, labs) > 0.6)
Exemplo n.º 6
0
 def test_random_1(self):
     """
     Test NB state estimation with random parameters
     """
     M, W, R = simulation.generate_nb_states(2, 200, 20)
     data = simulation.generate_nb_state_data(M, W, R)
     M_noised = M + 0.1*(np.random.random(M.shape)-0.5)
     M_, W_, R_, ll = nb_state_estimation.nb_estimate_state(data, 2, init_means=M_noised, R = R, disp=False)
     c1 = W.argmax(0)
     c2 = W_.argmax(0)
     p = purity(c2, c1)
     print(p)
     print(data)
     print(M)
     print(M_)
     self.assertTrue(p > 0.7)
Exemplo n.º 7
0
if __name__ == '__main__':
    dat = loadmat('data/SCDE_test.mat')
    data = dat['dat'].toarray()
    centers, assignments = uncurl.kmeans_pp(data, 2)
    lls = uncurl.poisson_ll(data, centers)
    # Poisson clustering
    assignments_poisson, centers = uncurl.poisson_cluster(data,
                                                          2,
                                                          init=centers)
    # NB clustering
    assignments_nb, P, R = uncurl.nb_cluster(data, 2)
    # ZIP clustering
    assignments_zip, M, L = uncurl.zip_cluster(data, 2)
    true_labs = dat['Lab'][0]
    print 'poisson purity:', purity(assignments_poisson, true_labs)
    print 'NB purity:', purity(assignments_nb, true_labs)
    print 'ZIP purity:', purity(assignments_zip, true_labs)
    # State estimation
    means, weights, ll = uncurl.poisson_estimate_state(data, 2, disp=False)
    w_classes = weights.argmax(0)
    print 'W argmax purity:', purity(w_classes, true_labs)
    # dimensionality reduction
    X = uncurl.dim_reduce(means, weights, 2)
    proj = np.dot(X, weights)
    # plotting dimensionality reduction
    plt.cla()
    # weight plot
    plt.title('Dimensionality reduction plot - assigned weight labels')
    plt.scatter(proj[0, :], proj[1, :], s=100, cmap='seismic', c=weights[0, :])
    plt.xlabel('dim 1')