def test_kmeans(self): table = Orange.data.Table('iris') cr = ClusteringEvaluation(table, learners=[KMeans(n_clusters=2), KMeans(n_clusters=3), KMeans(n_clusters=5)], k=3) expected = [0.68081362, 0.55259194, 0.48851755] np.testing.assert_almost_equal(Silhouette(cr), expected, decimal=2) expected = [0.51936073, 0.74837231, 0.59178896] np.testing.assert_almost_equal(AdjustedMutualInfoScore(cr), expected, decimal=2)
def test_deprecated_silhouette(self): with warnings.catch_warnings(record=True) as w: KMeans(compute_silhouette_score=True) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) with warnings.catch_warnings(record=True) as w: KMeans(compute_silhouette_score=False) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning)
def test_kmeans(self): kmeans = KMeans(n_clusters=2) c = kmeans(self.iris) X = self.iris.X[:20] p = c(X) # First 20 iris belong to one cluster assert len(set(p.ravel())) == 1
def test_kmeans_parameters(self): kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random') c = kmeans(self.iris)
def test_silhouette_sparse(self): """Test if silhouette gets calculated for sparse data""" kmeans = KMeans(compute_silhouette_score=True) sparse_iris = self.iris.copy() sparse_iris.X = csc_matrix(sparse_iris.X) c = kmeans(sparse_iris) self.assertFalse(np.isnan(c.silhouette))
def test_kmeans_parameters(self): kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random', compute_silhouette_score=True) c = kmeans(self.iris)
def test_kmeans(self): table = Orange.data.Table('iris') kmeans = KMeans(n_clusters=2) c = kmeans(table) X = table.X[:20] p = c(X) # First 20 iris belong to one cluster assert len(set(p.ravel())) == 1
def test_kmeans(self): table = Orange.data.Table('iris') cr = ClusteringEvaluation(k=3)(table, learners=[KMeans(n_clusters=2), KMeans(n_clusters=3), KMeans(n_clusters=5)]) expected = [0.68081362, 0.55259194, 0.48851755] np.testing.assert_almost_equal(Silhouette(cr), expected, decimal=2) expected = [0.65383807, 0.75511917, 0.68721092] np.testing.assert_almost_equal(AdjustedMutualInfoScore(cr), expected, decimal=2) self.assertIsNone(cr.models) cr = ClusteringEvaluation(k=3, store_models=True)( table, learners=[KMeans(n_clusters=2)]) self.assertEqual(cr.models.shape, (3, 1)) self.assertTrue(all(isinstance(m, KMeansModel) for m in cr.models.flatten()))
def test_kmeans_parameters(self): table = Orange.data.Table('iris') kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random') c = kmeans(table)
def test_kmeans_parameters(self): kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random') c = kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c))
def test_predict_table(self): kmeans = KMeans() c = kmeans(self.iris) table = self.iris[:20] p = c(table)
def test_predict_numpy(self): table = Orange.data.Table('iris') kmeans = KMeans() c = kmeans(table) X = table.X[::20] p = c(X)
def test_predict_table(self): table = Orange.data.Table('iris') kmeans = KMeans() c = kmeans(table) table = table[:20] p = c(table)
def test_predict_single_instance(self): table = Orange.data.Table('iris') kmeans = KMeans() c = kmeans(table) inst = table[0] p = c(inst)
class TestKMeans(unittest.TestCase): def setUp(self): self.kmeans = KMeans(n_clusters=2) self.iris = Orange.data.Table('iris') def test_kmeans(self): c = self.kmeans(self.iris) # First 20 iris belong to one cluster self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) self.assertEqual(1, len(set(c[:20].ravel()))) def test_kmeans_parameters(self): kmeans = KMeans(n_clusters=10, max_iter=10, random_state=42, tol=0.001, init='random') c = kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_table(self): c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_numpy(self): c = self.kmeans.fit(self.iris.X) self.assertEqual(KMeansModel, type(c)) self.assertEqual(np.ndarray, type(c.labels)) self.assertEqual(len(self.iris), len(c.labels)) def test_predict_sparse_csc(self): self.iris.X = csc_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_predict_spares_csr(self): self.iris.X = csr_matrix(self.iris.X[::20]) c = self.kmeans(self.iris) self.assertEqual(np.ndarray, type(c)) self.assertEqual(len(self.iris), len(c)) def test_model(self): c = self.kmeans.get_model(self.iris) self.assertEqual(KMeansModel, type(c)) self.assertEqual(len(self.iris), len(c.labels)) c1 = c(self.iris) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels, c1) def test_model_np(self): """ Test with numpy array as an input in model. """ c = self.kmeans.get_model(self.iris) c1 = c(self.iris.X) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels, c1) def test_model_sparse_csc(self): """ Test with sparse array as an input in model. """ c = self.kmeans.get_model(self.iris) c1 = c(csc_matrix(self.iris.X)) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels, c1) def test_model_sparse_csr(self): """ Test with sparse array as an input in model. """ c = self.kmeans.get_model(self.iris) c1 = c(csr_matrix(self.iris.X)) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels, c1) def test_model_instance(self): """ Test with instance as an input in model. """ c = self.kmeans.get_model(self.iris) c1 = c(self.iris[0]) # prediction of the model must be same since data are same self.assertEqual(c1, c.labels[0]) def test_model_list(self): """ Test with list as an input in model. """ c = self.kmeans.get_model(self.iris) c1 = c(self.iris.X.tolist()) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels, c1) # example with a list of only one data item c1 = c(self.iris.X.tolist()[0]) # prediction of the model must be same since data are same np.testing.assert_array_almost_equal(c.labels[0], c1) def test_model_bad_datatype(self): """ Check model with data-type that is not supported. """ c = self.kmeans.get_model(self.iris) self.assertRaises(TypeError, c, 10) def test_model_data_table_domain(self): """ Check model with data-type that is not supported. """ # ok domain data = Table( Domain( list(self.iris.domain.attributes) + [ContinuousVariable("a")]), np.concatenate((self.iris.X, np.ones((len(self.iris), 1))), axis=1)) c = self.kmeans.get_model(self.iris) res = c(data) np.testing.assert_array_almost_equal(c.labels, res) # totally different domain - should fail self.assertRaises(DomainTransformationError, c, Table("housing")) def test_deprecated_silhouette(self): with warnings.catch_warnings(record=True) as w: KMeans(compute_silhouette_score=True) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) with warnings.catch_warnings(record=True) as w: KMeans(compute_silhouette_score=False) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning)
def setUp(self): self.kmeans = KMeans(n_clusters=2) self.iris = Orange.data.Table('iris')
def test_predict_sparse(self): kmeans = KMeans() c = kmeans(self.iris) X = csc_matrix(self.iris.X[::20]) p = c(X)
def test_predict_numpy(self): kmeans = KMeans() c = kmeans(self.iris) X = self.iris.X[::20] p = c(X)
def test_predict_single_instance(self): kmeans = KMeans() c = kmeans(self.iris) inst = self.iris[0] p = c(inst)