Exemplo n.º 1
0
    def rmse(self, gt: DNDarray, yest: DNDarray) -> DNDarray:
        """
        Root mean square error (RMSE)

        Parameters
        ----------
        gt : DNDarray
            Input model data, Shape = (1,)
        yest : DNDarray
            Thresholded model data, Shape = (1,)
        """
        return ht.sqrt((ht.mean((gt - yest)**2))).larray.item()
Exemplo n.º 2
0
    def rmse(self, gt, yest):
        """
        Root mean square error (RMSE)

        Parameters
        ----------
        gt : HeAT tensor, shape (1,)
            Input model data
        yest : HeAT tensor, shape (1,)
            Thresholded model data
        """
        return ht.sqrt((ht.mean((gt - yest)**2))).larray.item()
Exemplo n.º 3
0
        def test_lasso(self):
            # ToDo: add additional tests
            # get some test data
            X = ht.load_hdf5(
                os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"),
                dataset="x",
                device=ht_device,
                split=0,
            )
            y = ht.load_hdf5(
                os.path.join(os.getcwd(), "heat/datasets/data/diabetes.h5"),
                dataset="y",
                device=ht_device,
                split=0,
            )

            # normalize dataset
            X = X / ht.sqrt((ht.mean(X ** 2, axis=0)))
            m, n = X.shape
            # HeAT lasso instance
            estimator = ht.regression.lasso.Lasso(max_iter=100, tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, ht.DNDarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1,))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, ht.DNDarray)
            self.assertEqual(yest.shape, (m, 1))

            with self.assertRaises(ValueError):
                estimator.fit(X, ht.zeros((3, 3, 3)))
            with self.assertRaises(ValueError):
                estimator.fit(ht.zeros((3, 3, 3)), ht.zeros((3, 3)))
Exemplo n.º 4
0
        def test_lasso(self):
            # ToDo: add additional tests
            # get some test data
            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # normalize dataset
            X = X / ht.sqrt((ht.mean(X**2, axis=0)))
            m, n = X.shape
            # HeAT lasso instance
            estimator = ht.core.regression.lasso.HeatLasso(max_iter=100,
                                                           tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, ht.DNDarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, ht.DNDarray)
            self.assertEqual(yest.shape, (m, ))

            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # Now the same stuff again in PyTorch
            X = torch.tensor(X._DNDarray__array)
            y = torch.tensor(y._DNDarray__array)

            # normalize dataset
            X = X / torch.sqrt((torch.mean(X**2, 0)))
            m, n = X.shape

            estimator = ht.core.regression.lasso.PytorchLasso(max_iter=100,
                                                              tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, torch.Tensor)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, torch.Tensor)
            self.assertEqual(yest.shape, (m, ))

            X = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="x")
            y = ht.load_hdf5(os.path.join(os.getcwd(),
                                          "heat/datasets/data/diabetes.h5"),
                             dataset="y")

            # Now the same stuff again in PyTorch
            X = X._DNDarray__array.numpy()
            y = y._DNDarray__array.numpy()

            # normalize dataset
            X = X / np.sqrt((np.mean(X**2, axis=0, keepdims=True)))
            m, n = X.shape

            estimator = ht.core.regression.lasso.NumpyLasso(max_iter=100,
                                                            tol=None)
            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertTrue(estimator.theta is None)
            self.assertTrue(estimator.n_iter is None)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_, None)
            self.assertEqual(estimator.intercept_, None)

            estimator.fit(X, y)

            # check whether the results are correct
            self.assertEqual(estimator.lam, 0.1)
            self.assertIsInstance(estimator.theta, np.ndarray)
            self.assertEqual(estimator.n_iter, 100)
            self.assertEqual(estimator.max_iter, 100)
            self.assertEqual(estimator.coef_.shape, (n - 1, 1))
            self.assertEqual(estimator.intercept_.shape, (1, ))

            yest = estimator.predict(X)

            # check whether the results are correct
            self.assertIsInstance(yest, np.ndarray)
            self.assertEqual(yest.shape, (m, ))
Exemplo n.º 5
0
import heat as ht
from matplotlib import pyplot as plt
from sklearn import datasets
import heat.ml.regression.lasso as lasso
import plotfkt

# read scikit diabetes data set
diabetes = datasets.load_diabetes()

# load diabetes dataset from hdf5 file
X = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="x", split=0)
y = ht.load_hdf5("../../heat/datasets/data/diabetes.h5", dataset="y", split=0)

# normalize dataset #DoTO this goes into the lasso fit routine soon as issue #106 is solved
X = X / ht.sqrt((ht.mean(X**2, axis=0)))

# HeAT lasso instance
estimator = lasso.HeatLasso(max_iter=100)

# List  lasso model parameters
theta_list = list()

# Range of lambda values
lamda = np.logspace(0, 4, 10) / 10

# compute the lasso path
for l in lamda:
    estimator.lam = l
    estimator.fit(X, y)
    theta_list.append(estimator.theta.numpy().flatten())
Exemplo n.º 6
0
    def __update_mean_variance(n_past, mu, var, X, sample_weight=None):
        """
        Adapted to HeAT from scikit-learn.

        Compute online update of Gaussian mean and variance.
        Given starting sample count, mean, and variance, a new set of
        points X, and optionally sample weights, return the updated mean and
        variance. (NB - each dimension (column) in X is treated as independent
        -- you get variance, not covariance).
        Can take scalar mean and variance, or vector mean and variance to
        simultaneously update a number of independent Gaussians.
        See Chan, Golub, and LeVeque 1983 [1]

        Parameters
        ----------
        n_past : int
            Number of samples represented in old mean and variance. If sample
            weights were given, this should contain the sum of sample
            weights represented in old mean and variance.
        mu : ht.tensor of shape (number of Gaussians,)
            Means for Gaussians in original set.
        var : ht.tensor of shape (number of Gaussians,)
            Variances for Gaussians in original set.
        sample_weight : ht.tensor of shape (n_samples,), optional (default=None)
            Weights applied to individual samples (1. for unweighted).

        Returns
        -------
        total_mu : ht.tensor of shape (number of Gaussians,)
            Updated mean for each Gaussian over the combined set.
        total_var : ht.tensor of shape (number of Gaussians,)
            Updated variance for each Gaussian over the combined set.

        References
        ----------
        [1] Chan, Tony F., Golub, Gene H., and Leveque, Randall J., "Algorithms for Computing the Sample Variance: Analysis
        and Recommendations", The American Statistician, 37:3, pp. 242-247, 1983
        """
        if X.shape[0] == 0:
            return mu, var

        # Compute (potentially weighted) mean and variance of new datapoints
        # TODO:Issue #351 allow weighted average across multiple axes
        if sample_weight is not None:
            n_new = float(sample_weight.sum())
            new_mu = ht.average(X, axis=0, weights=sample_weight)
            new_var = ht.average((X - new_mu)**2,
                                 axis=0,
                                 weights=sample_weight)
        else:
            n_new = X.shape[0]
            new_var = ht.var(X, axis=0)
            new_mu = ht.mean(X, axis=0)

        if n_past == 0:
            return new_mu, new_var

        n_total = float(n_past + n_new)
        # Combine mean of old and new data, taking into consideration
        # (weighted) number of observations
        total_mu = (n_new * new_mu + n_past * mu) / n_total
        # Combine variance of old and new data, taking into consideration
        # (weighted) number of observations. This is achieved by combining
        # the sum-of-squared-differences (ssd)
        old_ssd = n_past * var
        new_ssd = n_new * new_var
        total_ssd = old_ssd + new_ssd + (n_new * n_past /
                                         n_total) * (mu - new_mu)**2
        total_var = total_ssd / n_total

        return total_mu, total_var
Exemplo n.º 7
0
    def test_var(self):
        array_0_len = ht.MPI_WORLD.size * 2
        array_1_len = ht.MPI_WORLD.size * 2
        array_2_len = ht.MPI_WORLD.size * 2

        # test raises
        x = ht.zeros((2, 3, 4))
        with self.assertRaises(ValueError):
            x.var(axis=10)
        with self.assertRaises(ValueError):
            x.var(axis=[4])
        with self.assertRaises(ValueError):
            x.var(axis=[-4])
        with self.assertRaises(TypeError):
            ht.var(x, axis="01")
        with self.assertRaises(ValueError):
            ht.var(x, axis=(0, "10"))
        with self.assertRaises(ValueError):
            ht.var(x, axis=(0, 0))
        with self.assertRaises(NotImplementedError):
            ht.var(x, ddof=2)
        with self.assertRaises(ValueError):
            ht.var(x, ddof=-2)
        with self.assertRaises(ValueError):
            ht.mean(x, axis=torch.Tensor([0, 0]))

        a = ht.arange(1, 5)
        self.assertEqual(a.var(ddof=1), 1.666666666666666)

        # ones
        dimensions = []
        for d in [array_0_len, array_1_len, array_2_len]:
            dimensions.extend([d])
            hold = list(range(len(dimensions)))
            hold.append(None)
            for split in hold:  # loop over the number of dimensions of the test array
                z = ht.ones(dimensions, split=split)
                res = z.var(ddof=0)
                total_dims_list = list(z.shape)
                self.assertTrue((res == 0).all())
                # loop over the different single dimensions for var
                for it in range(len(z.shape)):
                    res = z.var(axis=it)
                    self.assertTrue(ht.allclose(res, 0))
                    target_dims = [
                        total_dims_list[q] for q in range(len(total_dims_list))
                        if q != it
                    ]
                    if not target_dims:
                        target_dims = ()
                    self.assertEqual(res.gshape, tuple(target_dims))
                    if z.split is None:
                        sp = None
                    else:
                        sp = z.split if it > z.split else z.split - 1
                        if it == split:
                            sp = None
                    self.assertEqual(res.split, sp)
                    if split == it:
                        res = z.var(axis=it)
                        self.assertTrue(ht.allclose(res, 0))
                loop_list = [
                    ",".join(map(str, comb))
                    for comb in combinations(list(range(len(z.shape))), 2)
                ]

                for it in loop_list:  # loop over the different combinations of dimensions for var
                    lp_split = [int(q) for q in it.split(",")]
                    res = z.var(axis=lp_split)
                    self.assertTrue((res == 0).all())
                    target_dims = [
                        total_dims_list[q] for q in range(len(total_dims_list))
                        if q not in lp_split
                    ]
                    if not target_dims:
                        target_dims = (1, )
                    if res.gshape:
                        self.assertEqual(res.gshape, tuple(target_dims))
                    if res.split is not None:
                        if any([split >= x for x in lp_split]):
                            self.assertEqual(res.split, len(target_dims) - 1)
                        else:
                            self.assertEqual(res.split, z.split)

        # values for the iris dataset var measured by libreoffice calc
        for sp in [None, 0, 1]:
            iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=sp)
            self.assertTrue(
                ht.allclose(ht.var(iris, bessel=True), 3.90318519755147))
Exemplo n.º 8
0
    def test_mean(self):
        array_0_len = 5
        array_1_len = 5
        array_2_len = 5

        x = ht.zeros((2, 3, 4))
        with self.assertRaises(ValueError):
            x.mean(axis=10)
        with self.assertRaises(ValueError):
            x.mean(axis=[4])
        with self.assertRaises(ValueError):
            x.mean(axis=[-4])
        with self.assertRaises(TypeError):
            ht.mean(x, axis="01")
        with self.assertRaises(ValueError):
            ht.mean(x, axis=(0, "10"))
        with self.assertRaises(ValueError):
            ht.mean(x, axis=(0, 0))
        with self.assertRaises(ValueError):
            ht.mean(x, axis=torch.Tensor([0, 0]))

        a = ht.arange(1, 5)
        self.assertEqual(a.mean(), 2.5)

        # ones
        dimensions = []

        for d in [array_0_len, array_1_len, array_2_len]:
            dimensions.extend([d])
            hold = list(range(len(dimensions)))
            hold.append(None)
            for split in hold:  # loop over the number of split dimension of the test array
                z = ht.ones(dimensions, split=split)
                res = z.mean()
                total_dims_list = list(z.shape)
                self.assertTrue((res == 1).all())
                for it in range(
                        len(z.shape)
                ):  # loop over the different single dimensions for mean
                    res = z.mean(axis=it)
                    self.assertTrue((res == 1).all())
                    target_dims = [
                        total_dims_list[q] for q in range(len(total_dims_list))
                        if q != it
                    ]
                    if not target_dims:
                        target_dims = ()
                    self.assertEqual(res.gshape, tuple(target_dims))
                    if z.split is None:
                        sp = None
                    else:
                        sp = z.split if it > z.split else z.split - 1
                        if it == split:
                            sp = None
                    self.assertEqual(res.split, sp)
                loop_list = [
                    ",".join(map(str, comb))
                    for comb in combinations(list(range(len(z.shape))), 2)
                ]

                for it in loop_list:  # loop over the different combinations of dimensions for mean
                    lp_split = [int(q) for q in it.split(",")]
                    res = z.mean(axis=lp_split)
                    self.assertTrue((res == 1).all())
                    target_dims = [
                        total_dims_list[q] for q in range(len(total_dims_list))
                        if q not in lp_split
                    ]
                    if not target_dims:
                        target_dims = (1, )
                    if res.gshape:
                        self.assertEqual(res.gshape, tuple(target_dims))
                    if res.split is not None:
                        if any([split >= x for x in lp_split]):
                            self.assertEqual(res.split, len(target_dims) - 1)
                        else:
                            self.assertEqual(res.split, z.split)

        # values for the iris dataset mean measured by libreoffice calc
        ax0 = ht.array(
            [5.84333333333333, 3.054, 3.75866666666667, 1.19866666666667])
        for sp in [None, 0, 1]:
            iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=sp)
            self.assertTrue(ht.allclose(ht.mean(iris), 3.46366666666667))
            self.assertTrue(ht.allclose(ht.mean(iris, axis=0), ax0))