def test_transform(): km = KMeans(n_clusters=n_clusters) km.fit(X) X_new = km.transform(km.cluster_centers_) for c in range(n_clusters): assert_equal(X_new[c, c], 0) for c2 in range(n_clusters): if c != c2: assert_greater(X_new[c, c2], 0)
def test_n_init(): """Check that increasing the number of init increases the quality""" n_runs = 5 n_init_range = [1, 5, 10] score = np.zeros((len(n_init_range), n_runs)) for i, n_init in enumerate(n_init_range): for j in range(n_runs): km = KMeans(n_clusters=n_clusters, n_init=n_init, random_state=j).fit(X) score[i, j] = km.score_ score = score.mean(axis=1) failure_msg = ("Score %r should be decreasing" " when n_init is increasing.") % list(score) for i in range(len(n_init_range) - 1): assert_true(score[i] >= score[i + 1], failure_msg)
def test_predict(): km = KMeans(n_clusters=n_clusters, random_state=42) km.fit(X) # sanity check: predict centroid labels pred = km.predict(km.cluster_centers_) assert_array_equal(pred, np.arange(n_clusters)) # sanity check: re-predict labeling for training set samples pred = km.predict(X) assert_array_equal(pred, km.labels_) # re-predict labels for training set using fit_predict pred = km.fit_predict(X) assert_array_equal(pred, km.labels_)
def test_k_means_random_init(): km = KMeans(n_clusters=n_clusters, random_state=42) km.fit(X) _check_fitted_model(km)
def test_score(): km1 = KMeans(n_clusters=n_clusters, n_init=1, max_iter=1, random_state=42) s1 = km1.fit(X).score(X) km2 = KMeans(n_clusters=n_clusters, max_iter=10, random_state=42) s2 = km2.fit(X).score(X) assert_greater(s2 + 1, s1)