def test_works_with_simplified_dunn(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2, init="kdtree") kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) assert n_clusters == kmeans.n_clusters_ assert rand > 0.75
def test_works_with_simplified_dunn(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2, init='kdtree') kmeans = DunnSearch(single_kmeans, max_clusters=10).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) self.assertEqual(n_clusters, kmeans.n_clusters_) self.assertGreater(rand, 0.75)
def test_works_with_sampled_gap(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) # allow for misidentification of 1 cluster self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters) self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters) self.assertGreater(rand, 0.75)
def test_works_with_sampled_gap(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = GAPSearch(single_kmeans, max_clusters=10).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) # allow for misidentification of 1 cluster assert kmeans.n_clusters_ + 1 >= n_clusters assert kmeans.n_clusters_ - 1 <= n_clusters assert rand > 0.75
def test_works_with_unfit_removal(self): n_clusters = 3 X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = DunnSearch(single_kmeans, max_clusters=10, drop_unfit=True).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) assert n_clusters == kmeans.n_clusters_ assert rand > 0.75 assert kmeans.estimators_ is None
def test_works_with_unfit_removal(self): n_clusters = 3 X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = DunnSearch(single_kmeans, max_clusters=10, drop_unfit=True).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) self.assertEqual(n_clusters, kmeans.n_clusters_) self.assertGreater(rand, 0.75) self.assertIsNone(kmeans.estimators_)
def test_works_with_unfit_removal(self): n_clusters = 3 X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = GAPSearch( single_kmeans, max_clusters=10, drop_unfit=True).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) # allow for misidentification of 1 cluster self.assertGreaterEqual(kmeans.n_clusters_ + 1, n_clusters) self.assertLessEqual(kmeans.n_clusters_ - 1, n_clusters) self.assertGreater(rand, 0.75) self.assertIsNone(kmeans.estimators_)
def test_works_with_unfit_removal(self): n_clusters = 3 X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2) kmeans = GAPSearch(single_kmeans, max_clusters=10, drop_unfit=True).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) # allow for misidentification of 1 cluster assert kmeans.n_clusters_ + 1 >= n_clusters assert kmeans.n_clusters_ - 1 <= n_clusters assert rand > 0.75 assert kmeans.estimators_ is None
def test_works_with_full_exact_dunn(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2, init="kdtree") kmeans = DunnSearch( single_kmeans, max_clusters=15, inter="closest", intra="furthest", ).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) assert n_clusters == kmeans.n_clusters_ assert rand > 0.75
def test_works_with_full_exact_dunn(self, _, n_clusters): X, y = data(n_clusters) single_kmeans = KMeans(n_clusters=2, init='kdtree') kmeans = DunnSearch( single_kmeans, max_clusters=15, inter='closest', intra='furthest', ).fit(X) rand = adjusted_rand_score(y, kmeans.labels_) self.assertEqual(n_clusters, kmeans.n_clusters_) self.assertGreater(rand, 0.75)