def test_update_centers(self): """ Tests update centers """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] n_features = X.shape[1] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) assignments = kmeans.reinitialize_empty_clusters( X, assignments, distances) # clear out centers to test method kmeans.cluster_centers = np.zeros((k, n_features)) kmeans.update_centers(X, assignments) # calculate average difference in coordinates of estimated # and real centers error = np.linalg.norm(kmeans.cluster_centers - centers) / k self.assertLess(error, EPS)
def test_assign_points(self): """ Tests initialize methods of the KMeans class. """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) # check assignment array shape self.assertEqual(assignments.ndim, 1) self.assertEqual(assignments.shape[0], n_samples) # check distances array shape self.assertEqual(distances.ndim, 1) self.assertEqual(distances.shape[0], n_samples) # check that assignments only include valid cluster indices (0 <= idx < k) self.assertTrue( np.all(np.logical_and(assignments < k, assignments >= 0))) # Check cluster assignments are correct self.assertTrue(np.all(assignments[:25] == 0)) self.assertTrue(np.all(assignments[25:50] == 1)) self.assertTrue(np.all(assignments[50:75] == 2)) self.assertTrue(np.all(assignments[75:] == 3))
def test_reinitialize_empty_clusters(self): """ Tests reassignment of points to empty clusters """ X, y, centers = generate_cluster_samples() n_samples = X.shape[0] k = centers.shape[0] kmeans = KMeans(k, N_ITER) # Set cluster centers so that assignment is deterministic kmeans.cluster_centers = centers assignments, distances = kmeans.assign_points(X) # reassign all points in cluster 3 to cluster 2 to create empty cluster assignments[75:] = 2 # reinitialize empty clusters by reassigning points assignments = kmeans.reinitialize_empty_clusters( X, assignments, distances) # ensure that each cluster has an assigned point # and that only valid cluster indices are used self.assertSetEqual(set(assignments), set(range(k)))