def test_kmeans_constraint_weights_bigger(self):
     n_samples = 100
     data = make_blobs(n_samples=n_samples,
                       n_features=2,
                       centers=2,
                       cluster_std=1.0,
                       center_box=(-10.0, 0.0),
                       shuffle=True,
                       random_state=2)
     X1 = data[0]
     data = make_blobs(n_samples=n_samples // 2,
                       n_features=2,
                       centers=2,
                       cluster_std=1.0,
                       center_box=(0.0, 10.0),
                       shuffle=True,
                       random_state=2)
     X2 = data[0]
     X = numpy.vstack([X1, X2])
     km = ConstraintKMeans(n_clusters=4, strategy='weights', history=True)
     km.fit(X)
     cl = km.predict(X)
     self.assertEqual(cl.shape, (X.shape[0], ))
     cls = km.cluster_centers_iter_
     self.assertEqual(len(cls.shape), 3)
     edges = km.cluster_edges()
     self.assertIsInstance(edges, set)
     self.assertEqual(len(edges), 5)
     self.assertIsInstance(list(edges)[0], tuple)
    def test_kmeans_constraint_weights(self):
        mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]])
        km = ConstraintKMeans(n_clusters=2,
                              verbose=10,
                              kmeans0=False,
                              random_state=1,
                              strategy='weights')
        buf = BufferedPrint()
        km.fit(mat, fLOG=buf.fprint)

        km = ConstraintKMeans(n_clusters=2,
                              verbose=5,
                              kmeans0=False,
                              random_state=1,
                              strategy='weights')
        km.fit(mat, fLOG=buf.fprint)

        self.assertEqual(km.cluster_centers_.shape, (2, 2))
        self.assertLesser(km.inertia_, 4.55)
        self.assertEqual(km.cluster_centers_,
                         numpy.array([[0.6, 0.6], [-0.05, -0.05]]))
        self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0]))
        pred = km.predict(mat)
        self.assertEqual(pred, numpy.array([1, 1, 1, 0]))
        dist = km.transform(mat)
        self.assertEqual(dist.shape, (4, 2))
        score = km.score(mat)
        self.assertEqual(score.shape, (4, ))
        self.assertIn("CKMeans", str(buf))
 def test_kmeans_constraint_blobs20(self):
     data = make_blobs(n_samples=20,
                       n_features=2,
                       centers=2,
                       cluster_std=1.0,
                       center_box=(-10.0, 0.0),
                       shuffle=True,
                       random_state=0)
     X1 = data[0]
     data = make_blobs(n_samples=10,
                       n_features=2,
                       centers=2,
                       cluster_std=1.0,
                       center_box=(0.0, 10.0),
                       shuffle=True,
                       random_state=0)
     X2 = data[0]
     X = numpy.vstack([X1, X2])
     km = ConstraintKMeans(n_clusters=4,
                           verbose=0,
                           kmeans0=False,
                           random_state=2,
                           strategy='gain',
                           balanced_predictions=True,
                           history=True)
     km.fit(X)
     pred = km.predict(X)
     diff = numpy.abs(km.labels_ - pred).sum()
     self.assertLesser(diff, 6)
     cls = km.cluster_centers_iter_
     self.assertEqual(len(cls.shape), 3)
 def test_kmeans_constraint_gain(self):
     mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]])
     km = ConstraintKMeans(n_clusters=2, verbose=0, kmeans0=False,
                           random_state=1, strategy='gain')
     km.fit(mat)
     self.assertEqual(km.cluster_centers_.shape, (2, 2))
     self.assertEqualFloat(km.inertia_, 0.455)
     self.assertEqual(km.cluster_centers_, numpy.array(
         [[0.6, 0.6], [-0.05, -0.05]]))
     self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0]))
     pred = km.predict(mat)
     self.assertEqual(pred, numpy.array([1, 1, 1, 0]))
 def test_kmeans_constraint_gain3(self):
     mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1],
                        [1, 1], [1.1, 0.9], [-1.1, 1.]])
     # Choose random_state=2 to get the labels [1 1 0 2 2 0].
     # This configuration can only be modified with a permutation
     # of 3 elements.
     km = ConstraintKMeans(n_clusters=3, verbose=0, kmeans0=False,
                           random_state=1, strategy='gain',
                           balanced_predictions=True)
     km.fit(mat)
     self.assertEqual(km.cluster_centers_.shape, (3, 2))
     lab = km.labels_
     self.assertEqual(lab[1], lab[2])
     self.assertEqual(lab[0], lab[5])
     self.assertEqual(lab[3], lab[4])
     pred = km.predict(mat)
     self.assertEqualArray(pred, lab)
 def test_kmeans_constraint_sparse(self):
     mat = numpy.array([[0, 0], [0.2, 0.2], [-0.1, -0.1], [1, 1]])
     mat = scipy.sparse.csr_matrix(mat)
     km = ConstraintKMeans(n_clusters=2, verbose=0, strategy='distance')
     km.fit(mat)
     self.assertEqual(km.cluster_centers_.shape, (2, 2))
     self.assertEqualFloat(km.inertia_, 0.455)
     if km.labels_[0] == 0:
         self.assertEqual(km.labels_, numpy.array([0, 1, 0, 1]))
         self.assertEqual(km.cluster_centers_, numpy.array(
             [[-0.05, -0.05], [0.6, 0.6]]))
     else:
         self.assertEqual(km.labels_, numpy.array([1, 0, 1, 0]))
         self.assertEqual(km.cluster_centers_, numpy.array(
             [[0.6, 0.6], [-0.05, -0.05]]))
     pred = km.predict(mat)
     if km.labels_[0] == 0:
         self.assertEqual(pred, numpy.array([0, 0, 0, 1]))
     else:
         self.assertEqual(pred, numpy.array([1, 1, 1, 0]))
km1 = ConstraintKMeans(n_clusters=4,
                       strategy='gain',
                       balanced_predictions=True)
km1.fit(X)

km2 = ConstraintKMeans(n_clusters=4,
                       strategy='distance',
                       balanced_predictions=True)
km2.fit(X)

##########################
# This algorithm tries to exchange points
# between clusters.

cl1 = km1.predict(X)
hist1 = Counter(cl1)

cl2 = km2.predict(X)
hist2 = Counter(cl2)

fig, ax = plt.subplots(1, 2, figsize=(10, 4))
for i in range(0, max(cl1) + 1):
    ax[0].plot(X[cl1 == i, 0],
               X[cl1 == i, 1],
               colors[i] + '.',
               label='cl%d' % i)
    ax[1].plot(X[cl2 == i, 0],
               X[cl2 == i, 1],
               colors[i] + '.',
               label='cl%d' % i)