Пример #1
0
    def test_fit_iris_unsplit(self):
        split = 0
        # get some test data
        iris = ht.load("heat/datasets/iris.csv", sep=";", split=split)
        ht.random.seed(1)
        # fit the clusters
        k = 3
        kmedoid = ht.cluster.KMedoids(n_clusters=k, random_state=1)
        kmedoid.fit(iris)

        # check whether the results are correct
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1]))
        # same test with init=kmedoids++
        kmedoid = ht.cluster.KMedoids(n_clusters=k, init="kmedoids++")
        kmedoid.fit(iris)

        # check whether the results are correct
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (k, iris.shape[1]))

        # check whether result is actually a datapoint
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - iris),
                           axis=1) == 0))
Пример #2
0
    def test_any(self):
        # float values, minor axis
        x = ht.float32([[2.7, 0, 0], [0, 0, 0], [0, 0.3, 0]], device=ht_device)
        any_tensor = x.any(axis=1)
        res = ht.uint8([1, 0, 1], device=ht_device)
        self.assertIsInstance(any_tensor, ht.DNDarray)
        self.assertEqual(any_tensor.shape, (3, ))
        self.assertEqual(any_tensor.dtype, ht.bool)
        self.assertTrue(ht.equal(any_tensor, res))

        # integer values, major axis, output tensor
        any_tensor = ht.zeros((2, ), device=ht_device)
        x = ht.int32([[0, 0], [0, 0], [0, 1]], device=ht_device)
        ht.any(x, axis=0, out=any_tensor)
        res = ht.uint8([0, 1], device=ht_device)
        self.assertIsInstance(any_tensor, ht.DNDarray)
        self.assertEqual(any_tensor.shape, (2, ))
        self.assertEqual(any_tensor.dtype, ht.bool)
        self.assertTrue(ht.equal(any_tensor, res))

        # float values, no axis
        x = ht.float64([[0, 0, 0], [0, 0, 0]], device=ht_device)
        res = ht.zeros(1, dtype=ht.uint8, device=ht_device)
        any_tensor = ht.any(x)
        self.assertIsInstance(any_tensor, ht.DNDarray)
        self.assertEqual(any_tensor.shape, (1, ))
        self.assertEqual(any_tensor.dtype, ht.bool)
        self.assertTrue(ht.equal(any_tensor, res))

        # split tensor, along axis
        x = ht.arange(10, split=0, device=ht_device)
        any_tensor = ht.any(x, axis=0)
        res = ht.uint8([1], device=ht_device)
        self.assertIsInstance(any_tensor, ht.DNDarray)
        self.assertEqual(any_tensor.shape, (1, ))
        self.assertEqual(any_tensor.dtype, ht.bool)
        self.assertTrue(ht.equal(any_tensor, res))
Пример #3
0
    def test_spherical_clusters(self):
        seed = 1
        n = 20 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))

        # More Samples
        n = 100 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        # check whether result is actually a datapoint
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))

        # different datatype
        n = 20 * ht.MPI_WORLD.size
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=1.0,
                                             offset=4.0,
                                             dtype=ht.float64,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] -
                                  data.astype(ht.float32)),
                           axis=1) == 0))

        # on Ints (different radius, offset and datatype
        data = self.create_spherical_dataset(num_samples_cluster=n,
                                             radius=10.0,
                                             offset=40.0,
                                             dtype=ht.int32,
                                             random_state=seed)
        kmedoid = ht.cluster.KMedoids(n_clusters=4, init="kmedoids++")
        kmedoid.fit(data)
        self.assertIsInstance(kmedoid.cluster_centers_, ht.DNDarray)
        self.assertEqual(kmedoid.cluster_centers_.shape, (4, 3))
        for i in range(kmedoid.cluster_centers_.shape[0]):
            self.assertTrue(
                ht.any(
                    ht.sum(ht.abs(kmedoid.cluster_centers_[i, :] - data),
                           axis=1) == 0))