def test_seuclidean(): with pytest.warns(None) as record: km = KMedoids(2, metric="seuclidean", method="pam") km.fit(np.array([0, 0, 0, 1]).reshape((4, 1))) km.predict(np.array([0, 0, 0, 1]).reshape((4, 1))) km.transform(np.array([0, 0, 0, 1]).reshape((4, 1))) assert len(record) == 0
def test_kmedoid_results(method, init, dtype): expected = np.hstack([np.zeros(50), np.ones(50)]) km = KMedoids(n_clusters=2, init=init, method=method, random_state=rng) km.fit(X_cc.astype(dtype)) # This test use data that are not perfectly separable so the # accuracy is not 1. Accuracy around 0.85 assert (np.mean(km.labels_ == expected) > 0.8) or (1 - np.mean(km.labels_ == expected) > 0.8) assert dtype is np.dtype(km.cluster_centers_.dtype).type assert dtype is np.dtype(km.transform(X_cc.astype(dtype)).dtype).type
def test_kmedoids_fit_predict_transform(): rng = np.random.RandomState(seed) model = KMedoids(random_state=rng) labels1 = model.fit_predict(X) assert len(labels1) == 100 assert_array_equal(labels1, model.labels_) labels2 = model.predict(X) assert_array_equal(labels1, labels2) Xt1 = model.fit_transform(X) assert_array_equal(Xt1.shape, (100, model.n_clusters)) Xt2 = model.transform(X) assert_array_equal(Xt1, Xt2)
def test_kmedoids_fit_naive(): n_clusters = 3 metric = "euclidean" model = KMedoids(n_clusters=n_clusters, metric=metric) Xnaive = np.asarray([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) model.fit(Xnaive) assert_array_equal(model.cluster_centers_, [[1, 0, 0], [0, 1, 0], [0, 0, 1]]) assert_array_equal(model.labels_, [0, 1, 2]) assert model.inertia_ == 0.0 # diagonal must be zero, off-diagonals must be positive X_new = model.transform(Xnaive) for c in range(n_clusters): assert X_new[c, c] == 0 for c2 in range(n_clusters): if c != c2: assert X_new[c, c2] > 0
def test_precomputed(): """Test the 'precomputed' distance metric.""" rng = np.random.RandomState(seed) X_1 = [[1.0, 0.0], [1.1, 0.0], [0.0, 1.0], [0.0, 1.1]] D_1 = euclidean_distances(X_1) X_2 = [[1.1, 0.0], [0.0, 0.9]] D_2 = euclidean_distances(X_2, X_1) kmedoids = KMedoids(metric="precomputed", n_clusters=2, random_state=rng) kmedoids.fit(D_1) assert_allclose(kmedoids.inertia_, 0.2) assert_array_equal(kmedoids.medoid_indices_, [2, 0]) assert_array_equal(kmedoids.labels_, [1, 1, 0, 0]) assert kmedoids.cluster_centers_ is None med_1, med_2 = tuple(kmedoids.medoid_indices_) predictions = kmedoids.predict(D_2) assert_array_equal(predictions, [med_1 // 2, med_2 // 2]) transformed = kmedoids.transform(D_2) assert_array_equal(transformed, D_2[:, kmedoids.medoid_indices_])