示例#1
0
    def test_update_centers(self):
        """
        Tests update centers
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        n_features = X.shape[1]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)
        assignments = kmeans.reinitialize_empty_clusters(
            X, assignments, distances)

        # clear out centers to test method
        kmeans.cluster_centers = np.zeros((k, n_features))
        kmeans.update_centers(X, assignments)

        # calculate average difference in coordinates of estimated
        # and real centers
        error = np.linalg.norm(kmeans.cluster_centers - centers) / k
        self.assertLess(error, EPS)
示例#2
0
    def test_assign_points(self):
        """
        Tests initialize methods of the KMeans class. 
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)

        # check assignment array shape
        self.assertEqual(assignments.ndim, 1)
        self.assertEqual(assignments.shape[0], n_samples)

        # check distances array shape
        self.assertEqual(distances.ndim, 1)
        self.assertEqual(distances.shape[0], n_samples)

        # check that assignments only include valid cluster indices (0 <= idx < k)
        self.assertTrue(
            np.all(np.logical_and(assignments < k, assignments >= 0)))

        # Check cluster assignments are correct
        self.assertTrue(np.all(assignments[:25] == 0))
        self.assertTrue(np.all(assignments[25:50] == 1))
        self.assertTrue(np.all(assignments[50:75] == 2))
        self.assertTrue(np.all(assignments[75:] == 3))
示例#3
0
    def test_reinitialize_empty_clusters(self):
        """
        Tests reassignment of points to empty clusters
        """
        X, y, centers = generate_cluster_samples()
        n_samples = X.shape[0]
        k = centers.shape[0]

        kmeans = KMeans(k, N_ITER)

        # Set cluster centers so that assignment is deterministic
        kmeans.cluster_centers = centers
        assignments, distances = kmeans.assign_points(X)

        # reassign all points in cluster 3 to cluster 2 to create empty cluster
        assignments[75:] = 2

        # reinitialize empty clusters by reassigning points
        assignments = kmeans.reinitialize_empty_clusters(
            X, assignments, distances)

        # ensure that each cluster has an assigned point
        # and that only valid cluster indices are used
        self.assertSetEqual(set(assignments), set(range(k)))