def test_timings(self): """ Test the twinx double axes with k-elbow timings """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, timings=True ) visualizer.fit(X) # Check that we kept track of time assert len(visualizer.k_timers_) == 4 assert all([t > 0 for t in visualizer.k_timers_]) # Check that we plotted time on a twinx assert hasattr(visualizer, "axes") assert len(visualizer.axes) == 2 # delete the timings axes and # overwrite k_timers_, k_values_ for image similarity Tests visualizer.axes[1].remove() visualizer.k_timers_ = [ 0.01084589958190918, 0.011144161224365234, 0.017028093338012695, 0.010634183883666992 ] visualizer.k_values_ = [2, 3, 4, 5] # call draw again which is normally called in fit visualizer.draw() visualizer.poof() self.assert_images_similar(visualizer)
def test_timings(self): """ Test the twinx double axes with k-elbow timings """ visualizer = KElbowVisualizer(KMeans(random_state=0), k=5, timings=True) visualizer.fit(X) # Check that we kept track of time self.assertEqual(len(visualizer.k_timers_), 4) self.assertTrue(all([t > 0 for t in visualizer.k_timers_])) # Check that we plotted time on a twinx self.assertTrue(hasattr(visualizer, "axes")) self.assertEqual(len(visualizer.axes), 2) # delete the timings axes and # overwrite k_timers_, k_values_ for image similarity Tests visualizer.axes[1].remove() visualizer.k_timers_ = [ 0.01084589958190918, 0.011144161224365234, 0.017028093338012695, 0.010634183883666992 ] visualizer.k_values_ = [2, 3, 4, 5] # call draw again which is normally called in fit visualizer.draw() visualizer.poof() self.assert_images_similar(visualizer)
def test_integrated_mini_batch_kmeans_elbow(self): """ Test no exceptions for mini-batch kmeans k-elbow visualizer """ # NOTE #182: cannot use occupancy dataset because of memory usage # Generate a blobs data set X, y = make_blobs(n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42) try: fig = plt.figure() ax = fig.add_subplot() visualizer = KElbowVisualizer(MiniBatchKMeans(random_state=42), k=4, ax=ax) visualizer.fit(X) visualizer.poof() self.assert_images_similar(visualizer) except Exception as e: self.fail("error during k-elbow: {}".format(e))
def test_topic_modeling_k_means(self): """ Test topic modeling k-means on the hobbies corpus """ corpus = self.load_corpus("hobbies") tfidf = TfidfVectorizer() docs = tfidf.fit_transform(corpus.data) visualizer = KElbowVisualizer(KMeans(), k=(4, 8)) visualizer.fit(docs) visualizer.poof() self.assert_images_similar(visualizer)
def test_silhouette_metric(self): """ Test the silhouette metric of the k-elbow visualizer """ visualizer = KElbowVisualizer(KMeans(random_state=0), k=5, metric="silhouette", timings=False) visualizer.fit(X) expected = np.array([0.691636, 0.456646, 0.255174, 0.239842]) self.assertEqual(len(visualizer.k_scores_), 4) visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_distortion_metric(self): """ Test the distortion metric of the k-elbow visualizer """ visualizer = KElbowVisualizer(KMeans(random_state=0), k=5, metric="distortion", timings=False) visualizer.fit(X) expected = np.array([7.677785, 8.364319, 8.893634, 8.013021]) self.assertEqual(len(visualizer.k_scores_), 4) visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_silhouette_metric(self): """ Test the silhouette metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="silhouette", timings=False ) visualizer.fit(X) expected = np.array([ 0.691636, 0.456646, 0.255174, 0.239842]) assert len(visualizer.k_scores_) == 4 visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_distortion_metric(self): """ Test the distortion metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="distortion", timings=False ) visualizer.fit(X) expected = np.array([ 7.677785, 8.364319, 8.893634, 8.013021]) assert len(visualizer.k_scores_) == 4 visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_integrated_mini_batch_kmeans_elbow(self): """ Test no exceptions for mini-batch kmeans k-elbow visualizer See #182: cannot use occupancy dataset because of memory usage """ # Generate a blobs data set X, y = make_blobs(n_samples=1000, n_features=12, centers=6, shuffle=True) try: visualizer = KElbowVisualizer(MiniBatchKMeans(), k=4) visualizer.fit(X) visualizer.poof() except Exception as e: self.fail("error during k-elbow: {}".format(e))
def test_calinski_harabaz_metric(self): """ Test the calinski-harabaz metric of the k-elbow visualizer """ visualizer = KElbowVisualizer(KMeans(random_state=0), k=5, metric="calinski_harabaz", timings=False) visualizer.fit(X) expected = np.array([ 81.662726256035683, 50.992378259195554, 40.952179227847012, 35.939494 ]) self.assertEqual(len(visualizer.k_scores_), 4) visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_calinski_harabaz_metric(self): """ Test the calinski-harabaz metric of the k-elbow visualizer """ visualizer = KElbowVisualizer( KMeans(random_state=0), k=5, metric="calinski_harabaz", timings=False ) visualizer.fit(X) assert len(visualizer.k_scores_) == 4 expected = np.array([ 81.662726256035683, 50.992378259195554, 40.952179227847012, 35.939494 ]) visualizer.poof() self.assert_images_similar(visualizer) assert_array_almost_equal(visualizer.k_scores_, expected)
def test_integrated_kmeans_elbow(self): """ Test no exceptions for kmeans k-elbow visualizer on blobs dataset """ # NOTE #182: cannot use occupancy dataset because of memory usage # Generate a blobs data set X,y = make_blobs( n_samples=1000, n_features=12, centers=6, shuffle=True, random_state=42 ) try: _, ax = plt.subplots() visualizer = KElbowVisualizer(KMeans(random_state=42), k=4, ax=ax) visualizer.fit(X) visualizer.poof() self.assert_images_similar(visualizer) except Exception as e: pytest.fail("error during k-elbow: {}".format(e))