Пример #1
0
 def test_kmeans(self):
     table = Orange.data.Table('iris')
     cr = ClusteringEvaluation(table, learners=[KMeans(n_clusters=2),
                                                KMeans(n_clusters=3),
                                                KMeans(n_clusters=5)], k=3)
     expected = [0.68081362,  0.55259194,  0.48851755]
     np.testing.assert_almost_equal(Silhouette(cr), expected, decimal=2)
     expected = [0.51936073,  0.74837231,  0.59178896]
     np.testing.assert_almost_equal(AdjustedMutualInfoScore(cr),
                                    expected, decimal=2)
Пример #2
0
    def test_deprecated_silhouette(self):
        with warnings.catch_warnings(record=True) as w:
            KMeans(compute_silhouette_score=True)

            assert len(w) == 1
            assert issubclass(w[-1].category, DeprecationWarning)

        with warnings.catch_warnings(record=True) as w:
            KMeans(compute_silhouette_score=False)

            assert len(w) == 1
            assert issubclass(w[-1].category, DeprecationWarning)
Пример #3
0
 def test_kmeans(self):
     kmeans = KMeans(n_clusters=2)
     c = kmeans(self.iris)
     X = self.iris.X[:20]
     p = c(X)
     # First 20 iris belong to one cluster
     assert len(set(p.ravel())) == 1
Пример #4
0
 def test_kmeans_parameters(self):
     kmeans = KMeans(n_clusters=10,
                     max_iter=10,
                     random_state=42,
                     tol=0.001,
                     init='random')
     c = kmeans(self.iris)
Пример #5
0
 def test_silhouette_sparse(self):
     """Test if silhouette gets calculated for sparse data"""
     kmeans = KMeans(compute_silhouette_score=True)
     sparse_iris = self.iris.copy()
     sparse_iris.X = csc_matrix(sparse_iris.X)
     c = kmeans(sparse_iris)
     self.assertFalse(np.isnan(c.silhouette))
Пример #6
0
 def test_kmeans_parameters(self):
     kmeans = KMeans(n_clusters=10,
                     max_iter=10,
                     random_state=42,
                     tol=0.001,
                     init='random',
                     compute_silhouette_score=True)
     c = kmeans(self.iris)
Пример #7
0
 def test_kmeans(self):
     table = Orange.data.Table('iris')
     kmeans = KMeans(n_clusters=2)
     c = kmeans(table)
     X = table.X[:20]
     p = c(X)
     # First 20 iris belong to one cluster
     assert len(set(p.ravel())) == 1
Пример #8
0
    def test_kmeans(self):
        table = Orange.data.Table('iris')
        cr = ClusteringEvaluation(k=3)(table, learners=[KMeans(n_clusters=2),
                                                        KMeans(n_clusters=3),
                                                        KMeans(n_clusters=5)])
        expected = [0.68081362, 0.55259194, 0.48851755]
        np.testing.assert_almost_equal(Silhouette(cr), expected, decimal=2)
        expected = [0.65383807, 0.75511917, 0.68721092]
        np.testing.assert_almost_equal(AdjustedMutualInfoScore(cr),
                                       expected, decimal=2)
        self.assertIsNone(cr.models)

        cr = ClusteringEvaluation(k=3, store_models=True)(
            table, learners=[KMeans(n_clusters=2)])
        self.assertEqual(cr.models.shape, (3, 1))
        self.assertTrue(all(isinstance(m, KMeansModel)
                            for m in cr.models.flatten()))
Пример #9
0
 def test_kmeans_parameters(self):
     table = Orange.data.Table('iris')
     kmeans = KMeans(n_clusters=10,
                     max_iter=10,
                     random_state=42,
                     tol=0.001,
                     init='random')
     c = kmeans(table)
Пример #10
0
 def test_kmeans_parameters(self):
     kmeans = KMeans(n_clusters=10,
                     max_iter=10,
                     random_state=42,
                     tol=0.001,
                     init='random')
     c = kmeans(self.iris)
     self.assertEqual(np.ndarray, type(c))
     self.assertEqual(len(self.iris), len(c))
Пример #11
0
 def test_predict_table(self):
     kmeans = KMeans()
     c = kmeans(self.iris)
     table = self.iris[:20]
     p = c(table)
Пример #12
0
 def test_predict_numpy(self):
     table = Orange.data.Table('iris')
     kmeans = KMeans()
     c = kmeans(table)
     X = table.X[::20]
     p = c(X)
Пример #13
0
 def test_predict_table(self):
     table = Orange.data.Table('iris')
     kmeans = KMeans()
     c = kmeans(table)
     table = table[:20]
     p = c(table)
Пример #14
0
 def test_predict_single_instance(self):
     table = Orange.data.Table('iris')
     kmeans = KMeans()
     c = kmeans(table)
     inst = table[0]
     p = c(inst)
Пример #15
0
class TestKMeans(unittest.TestCase):
    def setUp(self):
        self.kmeans = KMeans(n_clusters=2)
        self.iris = Orange.data.Table('iris')

    def test_kmeans(self):
        c = self.kmeans(self.iris)
        # First 20 iris belong to one cluster
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))
        self.assertEqual(1, len(set(c[:20].ravel())))

    def test_kmeans_parameters(self):
        kmeans = KMeans(n_clusters=10,
                        max_iter=10,
                        random_state=42,
                        tol=0.001,
                        init='random')
        c = kmeans(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_table(self):
        c = self.kmeans(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_numpy(self):
        c = self.kmeans.fit(self.iris.X)
        self.assertEqual(KMeansModel, type(c))
        self.assertEqual(np.ndarray, type(c.labels))
        self.assertEqual(len(self.iris), len(c.labels))

    def test_predict_sparse_csc(self):
        self.iris.X = csc_matrix(self.iris.X[::20])
        c = self.kmeans(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_predict_spares_csr(self):
        self.iris.X = csr_matrix(self.iris.X[::20])
        c = self.kmeans(self.iris)
        self.assertEqual(np.ndarray, type(c))
        self.assertEqual(len(self.iris), len(c))

    def test_model(self):
        c = self.kmeans.get_model(self.iris)
        self.assertEqual(KMeansModel, type(c))
        self.assertEqual(len(self.iris), len(c.labels))

        c1 = c(self.iris)
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels, c1)

    def test_model_np(self):
        """
        Test with numpy array as an input in model.
        """
        c = self.kmeans.get_model(self.iris)
        c1 = c(self.iris.X)
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels, c1)

    def test_model_sparse_csc(self):
        """
        Test with sparse array as an input in model.
        """
        c = self.kmeans.get_model(self.iris)
        c1 = c(csc_matrix(self.iris.X))
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels, c1)

    def test_model_sparse_csr(self):
        """
        Test with sparse array as an input in model.
        """
        c = self.kmeans.get_model(self.iris)
        c1 = c(csr_matrix(self.iris.X))
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels, c1)

    def test_model_instance(self):
        """
        Test with instance as an input in model.
        """
        c = self.kmeans.get_model(self.iris)
        c1 = c(self.iris[0])
        # prediction of the model must be same since data are same
        self.assertEqual(c1, c.labels[0])

    def test_model_list(self):
        """
        Test with list as an input in model.
        """
        c = self.kmeans.get_model(self.iris)
        c1 = c(self.iris.X.tolist())
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels, c1)

        # example with a list of only one data item
        c1 = c(self.iris.X.tolist()[0])
        # prediction of the model must be same since data are same
        np.testing.assert_array_almost_equal(c.labels[0], c1)

    def test_model_bad_datatype(self):
        """
        Check model with data-type that is not supported.
        """
        c = self.kmeans.get_model(self.iris)
        self.assertRaises(TypeError, c, 10)

    def test_model_data_table_domain(self):
        """
        Check model with data-type that is not supported.
        """
        # ok domain
        data = Table(
            Domain(
                list(self.iris.domain.attributes) + [ContinuousVariable("a")]),
            np.concatenate((self.iris.X, np.ones((len(self.iris), 1))),
                           axis=1))
        c = self.kmeans.get_model(self.iris)
        res = c(data)
        np.testing.assert_array_almost_equal(c.labels, res)

        # totally different domain - should fail
        self.assertRaises(DomainTransformationError, c, Table("housing"))

    def test_deprecated_silhouette(self):
        with warnings.catch_warnings(record=True) as w:
            KMeans(compute_silhouette_score=True)

            assert len(w) == 1
            assert issubclass(w[-1].category, DeprecationWarning)

        with warnings.catch_warnings(record=True) as w:
            KMeans(compute_silhouette_score=False)

            assert len(w) == 1
            assert issubclass(w[-1].category, DeprecationWarning)
Пример #16
0
 def setUp(self):
     self.kmeans = KMeans(n_clusters=2)
     self.iris = Orange.data.Table('iris')
Пример #17
0
 def test_predict_sparse(self):
     kmeans = KMeans()
     c = kmeans(self.iris)
     X = csc_matrix(self.iris.X[::20])
     p = c(X)
Пример #18
0
 def test_predict_numpy(self):
     kmeans = KMeans()
     c = kmeans(self.iris)
     X = self.iris.X[::20]
     p = c(X)
Пример #19
0
 def test_predict_single_instance(self):
     kmeans = KMeans()
     c = kmeans(self.iris)
     inst = self.iris[0]
     p = c(inst)