def test_random_cluster_centroid(): X = [[1,2,3],[4,5,6]] M = [[0,1,1],[1,0,1]] K = 2 kmeans = KMeans(X,M,K) kmeans.mins = [4.0,2.0,3.0] kmeans.maxs = [4.0,2.0,6.0] expected_centroid = [4.0,2.0,4.2617147424925346] random.seed(0) centroid = kmeans.random_cluster_centroid() assert numpy.array_equal(expected_centroid,centroid)
def test_random_cluster_centroid(): X = [[1, 2, 3], [4, 5, 6]] M = [[0, 1, 1], [1, 0, 1]] K = 2 kmeans = KMeans(X, M, K) kmeans.mins = [4.0, 2.0, 3.0] kmeans.maxs = [4.0, 2.0, 6.0] expected_centroid = [4.0, 2.0, 4.2617147424925346] random.seed(0) centroid = kmeans.random_cluster_centroid() assert numpy.array_equal(expected_centroid, centroid)
def test_update(): # Normal case X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 2 kmeans = KMeans(X,M,K) kmeans.data_point_assignments = numpy.array([[0,1],[2]]) #points 0,1 to cluster 0, point 2 to cluster 1 kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] new_centroids = [[1.0,3.5,0],[7.0,8.0,9.0]] new_mask_centroids = [[1,1,0],[1,1,1]] kmeans.update() assert numpy.array_equal(new_centroids,kmeans.centroids) assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids) # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 2 kmeans = KMeans(X,M,K,'random') kmeans.data_point_assignments = numpy.array([[0,1,2],[]]) #points 0,1,2 to cluster 0, none to cluster 1 kmeans.centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mask_centroids = [[0.0,0.0,0.0],[0.0,0.0,0.0]] kmeans.mins = [1.0,2.0,9.0] kmeans.maxs = [7.0,8.0,9.0] new_centroids = [[4.0,5.0,9.0],[6.066531109150288,6.547726417641815,9.0]] new_mask_centroids = [[1,1,1],[1,1,1]] random.seed(0) kmeans.update() assert numpy.array_equal(new_centroids,kmeans.centroids) assert numpy.array_equal(new_mask_centroids,kmeans.mask_centroids) # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. # Point 2 is furthest away, so gets reassigned to cluster 2 - making # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1 X = numpy.array([[1.0,2.0,3.0],[4.0,5.0,6.0],[7.0,8.0,9.0]]) M = numpy.array([[1,1,0],[0,1,0],[1,1,1]]) K = 3 kmeans = KMeans(X,M,K,resolve_empty='singleton') kmeans.data_point_assignments = numpy.array([[0,1],[2],[]]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2 kmeans.cluster_assignments = [0,0,1] kmeans.centroids = [[1.0,2.0,3.0],[15.0,16.0,17.0],[500.0,500.0,500.0]] kmeans.mask_centroids = [[1,1,0],[1,1,1],[1,1,1]] kmeans.distances = numpy.array([ kmeans.compute_MSE(kmeans.X[0],kmeans.centroids[0],M[0],kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[1],kmeans.centroids[0],M[1],kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[2],kmeans.centroids[1],M[2],kmeans.mask_centroids[1]) ]) kmeans.mins = [1.0,2.0,9.0] kmeans.maxs = [7.0,8.0,9.0] new_centroids = [[1.0,2.0,0],[4.0,5.0,6.0],[7.0,8.0,9.0]] new_data_point_assignments = [[0],[1],[2]] new_distances = [0,0,0] kmeans.update() assert new_data_point_assignments == list(kmeans.data_point_assignments) assert numpy.array_equal(new_distances,kmeans.distances) assert numpy.array_equal(new_centroids,kmeans.centroids)
def test_update(): # Normal case X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 2 kmeans = KMeans(X, M, K) kmeans.data_point_assignments = numpy.array( [[0, 1], [2]]) #points 0,1 to cluster 0, point 2 to cluster 1 kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] new_centroids = [[1.0, 3.5, 0], [7.0, 8.0, 9.0]] new_mask_centroids = [[1, 1, 0], [1, 1, 1]] kmeans.update() assert numpy.array_equal(new_centroids, kmeans.centroids) assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids) # Case when one cluster has no points assigned to it - we then randomly re-initialise that cluster X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 2 kmeans = KMeans(X, M, K, 'random') kmeans.data_point_assignments = numpy.array( [[0, 1, 2], []]) #points 0,1,2 to cluster 0, none to cluster 1 kmeans.centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mask_centroids = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] kmeans.mins = [1.0, 2.0, 9.0] kmeans.maxs = [7.0, 8.0, 9.0] new_centroids = [[4.0, 5.0, 9.0], [6.066531109150288, 6.547726417641815, 9.0]] new_mask_centroids = [[1, 1, 1], [1, 1, 1]] random.seed(0) kmeans.update() assert numpy.array_equal(new_centroids, kmeans.centroids) assert numpy.array_equal(new_mask_centroids, kmeans.mask_centroids) # Case when we use the 'singleton' option for empty clusters - reassign point furthest away to the cluster # Points 0 and 1 go to cluster 0, 2 to cluster 1 and none to cluster 2. # Point 2 is furthest away, so gets reassigned to cluster 2 - making # cluster 1 empty. Then point 1 is furthest away and gets reassigned to cluster 1 X = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) M = numpy.array([[1, 1, 0], [0, 1, 0], [1, 1, 1]]) K = 3 kmeans = KMeans(X, M, K, resolve_empty='singleton') kmeans.data_point_assignments = numpy.array( [[0, 1], [2], []]) #points 0,1 to cluster 0, 2 to cluster 1, none to cluster 2 kmeans.cluster_assignments = [0, 0, 1] kmeans.centroids = [[1.0, 2.0, 3.0], [15.0, 16.0, 17.0], [500.0, 500.0, 500.0]] kmeans.mask_centroids = [[1, 1, 0], [1, 1, 1], [1, 1, 1]] kmeans.distances = numpy.array([ kmeans.compute_MSE(kmeans.X[0], kmeans.centroids[0], M[0], kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[1], kmeans.centroids[0], M[1], kmeans.mask_centroids[0]), kmeans.compute_MSE(kmeans.X[2], kmeans.centroids[1], M[2], kmeans.mask_centroids[1]) ]) kmeans.mins = [1.0, 2.0, 9.0] kmeans.maxs = [7.0, 8.0, 9.0] new_centroids = [[1.0, 2.0, 0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]] new_data_point_assignments = [[0], [1], [2]] new_distances = [0, 0, 0] kmeans.update() assert new_data_point_assignments == list(kmeans.data_point_assignments) assert numpy.array_equal(new_distances, kmeans.distances) assert numpy.array_equal(new_centroids, kmeans.centroids)