Пример #1
0
    def label_to_one_hot(a):
        max_label = ht.max(a)
        a = a.expand_dims(1)

        items = ht.arange(0, max_label.item() + 1)
        one_hot = ht.stack([items for i in range(a.shape[0])], axis=0)
        one_hot = ht.where(one_hot == a, 1, 0)

        return one_hot
Пример #2
0
    def one_hot_encoding(x: DNDarray) -> DNDarray:
        """
        One-hot-encodes the passed vector or single-column matrix.

        Parameters
        ----------
        x : DNDarray
            The data to be encoded.
        """
        n_samples = x.shape[0]
        n_features = ht.max(x).item() + 1

        one_hot = ht.zeros((n_samples, n_features),
                           split=x.split,
                           device=x.device,
                           comm=x.comm)
        one_hot.lloc[range(one_hot.lshape[0]), x.larray] = 1

        return one_hot
Пример #3
0
    def test_max(self):
        data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]

        ht_array = ht.array(data)
        comparison = torch.tensor(data)

        # check global max
        maximum = ht.max(ht_array)

        self.assertIsInstance(maximum, ht.tensor)
        self.assertEqual(maximum.shape, (1, ))
        self.assertEqual(maximum.lshape, (1, ))
        self.assertEqual(maximum.split, None)
        self.assertEqual(maximum.dtype, ht.int64)
        self.assertEqual(maximum._tensor__array.dtype, torch.int64)
        self.assertEqual(maximum, 12)

        # maximum along first axis
        maximum_vertical = ht.max(ht_array, axis=0)

        self.assertIsInstance(maximum_vertical, ht.tensor)
        self.assertEqual(maximum_vertical.shape, (
            1,
            3,
        ))
        self.assertEqual(maximum_vertical.lshape, (
            1,
            3,
        ))
        self.assertEqual(maximum_vertical.split, None)
        self.assertEqual(maximum_vertical.dtype, ht.int64)
        self.assertEqual(maximum_vertical._tensor__array.dtype, torch.int64)
        self.assertTrue((maximum_vertical._tensor__array == comparison.max(
            dim=0, keepdim=True)[0]).all())

        # maximum along second axis
        maximum_horizontal = ht.max(ht_array, axis=1)

        self.assertIsInstance(maximum_horizontal, ht.tensor)
        self.assertEqual(maximum_horizontal.shape, (
            4,
            1,
        ))
        self.assertEqual(maximum_horizontal.lshape, (
            4,
            1,
        ))
        self.assertEqual(maximum_horizontal.split, None)
        self.assertEqual(maximum_horizontal.dtype, ht.int64)
        self.assertEqual(maximum_horizontal._tensor__array.dtype, torch.int64)
        self.assertTrue((maximum_horizontal._tensor__array == comparison.max(
            dim=1, keepdim=True)[0]).all())

        # check max over all float elements of split 3d tensor, across split axis
        random_volume = ht.random.randn(3, 3, 3, split=1)
        maximum_volume = ht.max(random_volume, axis=1)

        self.assertIsInstance(maximum_volume, ht.tensor)
        self.assertEqual(maximum_volume.shape, (3, 1, 3))
        self.assertEqual(maximum_volume.lshape, (3, 1, 3))
        self.assertEqual(maximum_volume.dtype, ht.float32)
        self.assertEqual(maximum_volume._tensor__array.dtype, torch.float32)
        self.assertEqual(maximum_volume.split, None)

        # check max over all float elements of split 5d tensor, along split axis
        random_5d = ht.random.randn(1, 2, 3, 4, 5, split=0)
        maximum_5d = ht.max(random_5d, axis=1)

        self.assertIsInstance(maximum_5d, ht.tensor)
        self.assertEqual(maximum_5d.shape, (1, 1, 3, 4, 5))
        self.assertLessEqual(maximum_5d.lshape[1], 2)
        self.assertEqual(maximum_5d.dtype, ht.float32)
        self.assertEqual(maximum_5d._tensor__array.dtype, torch.float32)
        self.assertEqual(maximum_5d.split, 0)

        # check exceptions
        with self.assertRaises(NotImplementedError):
            ht_array.max(axis=(0, 1))
        with self.assertRaises(TypeError):
            ht_array.max(axis=1.1)
        with self.assertRaises(TypeError):
            ht_array.max(axis='y')
        with self.assertRaises(ValueError):
            ht.max(ht_array, axis=-4)
Пример #4
0
    def logsumexp(self,
                  a,
                  axis=None,
                  b=None,
                  keepdim=False,
                  return_sign=False):
        """
        Adapted to HeAT from scikit-learn.

        Compute the log of the sum of exponentials of input elements.

        Parameters
        ----------
        a : ht.tensor
            Input array.
        axis : None or int or tuple of ints, optional
            Axis or axes over which the sum is taken. By default `axis` is None,
            and all elements are summed.
        keepdim : bool, optional
            If this is set to True, the axes which are reduced are left in the
            result as dimensions with size one. With this option, the result
            will broadcast correctly against the original array.
        b : ht.tensor, optional
            Scaling factor for exp(`a`) must be of the same shape as `a` or
            broadcastable to `a`. These values may be negative in order to
            implement subtraction.
        #return_sign : bool, optional
            If this is set to True, the result will be a pair containing sign
            information; if False, results that are negative will be returned
            as NaN. Default is False (no sign information).
            #TODO: returns NotImplementedYet error.

        Returns
        -------
        res : ht.tensor
            The result, ``np.log(np.sum(np.exp(a)))`` calculated in a numerically
            more stable way. If `b` is given then ``np.log(np.sum(b*np.exp(a)))``
            is returned.
        #TODO sgn : ndarray NOT IMPLEMENTED YET
            If return_sign is True, this will be an array of floating-point
            numbers matching res and +1, 0, or -1 depending on the sign
            of the result. If False, only one result is returned.

        """

        if b is not None:
            raise NotImplementedError("Not implemented for weighted logsumexp")

        a_max = ht.max(a, axis=axis, keepdim=True)

        # TODO: sanitize a_max / implement isfinite(): sanitation module, cf. #468
        # if a_max.numdims > 0:
        #     a_max[~np.isfinite(a_max)] = 0
        # elif not np.isfinite(a_max):
        #     a_max = 0

        # TODO: reinstate after allowing b not None
        # if b is not None:
        #     b = np.asarray(b)
        #     tmp = b * np.exp(a - a_max)
        # else:
        tmp = ht.exp(a - a_max)

        s = ht.sum(tmp, axis=axis, keepdim=keepdim)
        if return_sign:
            raise NotImplementedError("Not implemented for return_sign")
            # sgn = np.sign(s)  # TODO: np.sign
            # s *= sgn  # /= makes more sense but we need zero -> zero
        out = ht.log(s)

        if not keepdim:
            a_max = ht.squeeze(a_max, axis=axis)
        out += a_max

        # if return_sign: #TODO: np.sign
        #    return out, sgn
        # else:
        return out
Пример #5
0
    def test_max(self):
        data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]

        ht_array = ht.array(data)
        comparison = torch.tensor(data, device=self.device.torch_device)

        # check global max
        maximum = ht.max(ht_array)

        self.assertIsInstance(maximum, ht.DNDarray)
        self.assertEqual(maximum.shape, (1, ))
        self.assertEqual(maximum.lshape, (1, ))
        self.assertEqual(maximum.split, None)
        self.assertEqual(maximum.dtype, ht.int64)
        self.assertEqual(maximum._DNDarray__array.dtype, torch.int64)
        self.assertEqual(maximum, 12)

        # maximum along first axis
        maximum_vertical = ht.max(ht_array, axis=0)

        self.assertIsInstance(maximum_vertical, ht.DNDarray)
        self.assertEqual(maximum_vertical.shape, (3, ))
        self.assertEqual(maximum_vertical.lshape, (3, ))
        self.assertEqual(maximum_vertical.split, None)
        self.assertEqual(maximum_vertical.dtype, ht.int64)
        self.assertEqual(maximum_vertical._DNDarray__array.dtype, torch.int64)
        self.assertTrue((maximum_vertical._DNDarray__array == comparison.max(
            dim=0, keepdim=True)[0]).all())

        # maximum along second axis
        maximum_horizontal = ht.max(ht_array, axis=1, keepdim=True)

        self.assertIsInstance(maximum_horizontal, ht.DNDarray)
        self.assertEqual(maximum_horizontal.shape, (4, 1))
        self.assertEqual(maximum_horizontal.lshape, (4, 1))
        self.assertEqual(maximum_horizontal.split, None)
        self.assertEqual(maximum_horizontal.dtype, ht.int64)
        self.assertEqual(maximum_horizontal._DNDarray__array.dtype,
                         torch.int64)
        self.assertTrue((maximum_horizontal._DNDarray__array == comparison.max(
            dim=1, keepdim=True)[0]).all())

        # check max over all float elements of split 3d tensor, across split axis
        size = ht.MPI_WORLD.size
        random_volume = ht.random.randn(3, 3 * size, 3, split=1)
        maximum_volume = ht.max(random_volume, axis=1)

        self.assertIsInstance(maximum_volume, ht.DNDarray)
        self.assertEqual(maximum_volume.shape, (3, 3))
        self.assertEqual(maximum_volume.lshape, (3, 3))
        self.assertEqual(maximum_volume.dtype, ht.float32)
        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32)
        self.assertEqual(maximum_volume.split, None)

        # check max over all float elements of split 3d tensor, tuple axis
        random_volume = ht.random.randn(3 * size, 3, 3, split=0)
        maximum_volume = ht.max(random_volume, axis=(1, 2))
        alt_maximum_volume = ht.max(random_volume, axis=(2, 1))

        self.assertIsInstance(maximum_volume, ht.DNDarray)
        self.assertEqual(maximum_volume.shape, (3 * size, ))
        self.assertEqual(maximum_volume.dtype, ht.float32)
        self.assertEqual(maximum_volume._DNDarray__array.dtype, torch.float32)
        self.assertEqual(maximum_volume.split, 0)
        self.assertTrue((maximum_volume == alt_maximum_volume).all())

        # check max over all float elements of split 5d tensor, along split axis
        random_5d = ht.random.randn(1 * size, 2, 3, 4, 5, split=0)
        maximum_5d = ht.max(random_5d, axis=1)

        self.assertIsInstance(maximum_5d, ht.DNDarray)
        self.assertEqual(maximum_5d.shape, (1 * size, 3, 4, 5))
        self.assertLessEqual(maximum_5d.lshape[1], 3)
        self.assertEqual(maximum_5d.dtype, ht.float32)
        self.assertEqual(maximum_5d._DNDarray__array.dtype, torch.float32)
        self.assertEqual(maximum_5d.split, 0)

        # Calculating max with empty local vectors works
        if size > 1:
            a = ht.arange(size - 1, split=0)
            res = ht.max(a)
            expected = torch.tensor([size - 2],
                                    dtype=a.dtype.torch_type(),
                                    device=self.device.torch_device)
            self.assertTrue(torch.equal(res._DNDarray__array, expected))

        # check exceptions
        with self.assertRaises(TypeError):
            ht_array.max(axis=1.1)
        with self.assertRaises(TypeError):
            ht_array.max(axis="y")
        with self.assertRaises(ValueError):
            ht.max(ht_array, axis=-4)