Exemplo n.º 1
0
    def test_hypercube_to_hypersphere_surface_2D_full_single_point(self):
        hc = np.array([0.2, 0.9])
        hs = hypercube_to_hypersphere_surface(hc, half_hypersphere=False)

        # check dimensionality and norms
        self.assertEqual(hs.ndim, 1)
        self.assertEqual(hs.shape, (3, ))
        assert_almost_equal(np.linalg.norm(hs), 1)
Exemplo n.º 2
0
    def test_hypercube_to_hypersphere_surface_2D_full(self):
        n_points_per_dim = 1000
        n_points = n_points_per_dim**2
        grid = np.linspace(0, 1, n_points_per_dim)
        x, y = np.meshgrid(grid, grid)
        hc = np.array([x.flatten(), y.flatten()]).T
        hs = hypercube_to_hypersphere_surface(hc, half_hypersphere=False)

        # check dimensionality and norms
        self.assertEqual(hs.ndim, 2)
        self.assertEqual(hs.shape, (n_points, 2 + 1))
        assert_almost_equal(np.linalg.norm(hs, axis=1), 1)

        # make sure all quadrants contain approximately the same number of data points
        tolerance_fraction = 0.01
        for quadrant_signs in itertools.product([-1, 1], [-1, 1], [-1, 1]):
            in_quadrant = np.all(hs * quadrant_signs > 0, axis=1).sum()
            min = n_points / 2**(2 + 1) * (1 - tolerance_fraction)
            max = n_points / 2**(2 + 1) * (1 + tolerance_fraction)
            msg = f'Expected a value between {min:.0f} and {max:.0f}, but was {in_quadrant}'
            self.assertTrue(min <= np.sum(in_quadrant) <= max, msg=msg)
Exemplo n.º 3
0
    def test_hypercube_to_hypersphere_surface_1D_half(self):
        n_points = 11
        hc = np.linspace(0, 1, n_points).reshape(-1, 1)
        hs = hypercube_to_hypersphere_surface(hc, half_hypersphere=True)

        # check dimensionality and norms
        self.assertEqual(hs.ndim, 2)
        self.assertEqual(hs.shape, (n_points, 2))
        assert_almost_equal(np.linalg.norm(hs, axis=1), 1)

        # check uniformity
        expected_cos = np.dot(hs[0], hs[1])
        for i in range(1, n_points):
            cos = np.dot(hs[i - 1], hs[i])
            assert_almost_equal(cos, expected_cos)

        cos = np.dot(hs[0], hs[-2])
        assert_almost_equal(cos, -expected_cos)

        cos = np.dot(hs[0], hs[-1])
        assert_almost_equal(cos, -1.0)
Exemplo n.º 4
0
    def test_hypercube_to_hypersphere_surface_6D_full(self):
        n_points = 1_000_000
        np.random.seed(666)
        hc = np.random.uniform(0, 1, (n_points, 6))
        hs = hypercube_to_hypersphere_surface(hc, half_hypersphere=False)
        # hs = np.random.normal(0, 1, hs.shape)

        # check dimensionality and norms
        self.assertEqual(hs.ndim, 2)
        self.assertEqual(hs.shape, (n_points, 6 + 1))
        assert_almost_equal(np.linalg.norm(hs, axis=1), 1)

        # make sure all quadrants contain approximately the same number of data points
        tolerance_fraction = 0.03
        for quadrant_signs in itertools.product(
                *list(np.tile([-1, 1], (6 + 1, 1)))):
            in_quadrant = np.all(hs * quadrant_signs > 0, axis=1).sum()
            min = n_points / 2**(6 + 1) * (1 - tolerance_fraction)
            max = n_points / 2**(6 + 1) * (1 + tolerance_fraction)
            msg = f'Expected a value between {min:.0f} and {max:.0f}, but was {in_quadrant}'
            self.assertTrue(min <= np.sum(in_quadrant) <= max, msg=msg)
Exemplo n.º 5
0
    def compute(self, hyperplane_normal):
        self.function_evaluations += 1

        if self.search_space_is_unit_hypercube:
            hyperplane_normal = hypercube_to_hypersphere_surface(
                hyperplane_normal, half_hypersphere=True)

        # catch some special cases and normalize to unit length
        hyperplane_normal = np.nan_to_num(hyperplane_normal)
        if np.all(hyperplane_normal == 0):
            hyperplane_normal[0] = 1

        hyperplane_normal /= np.linalg.norm(hyperplane_normal)

        dense = isinstance(self.X, np.ndarray)
        if not dense and isinstance(self.X, csr_matrix):
            self.X = csc_matrix(self.X)

        # compute distance of all points to the hyperplane: https://mathinsight.org/distance_point_plane
        projections = self.X @ hyperplane_normal  # up to an additive constant which doesn't matter to distance ordering
        sort_indices = np.argsort(projections)
        split_indices = 1 + np.where(
            np.abs(np.diff(projections)) > self.split_precision)[
                0]  # we can only split between *different* data points
        if len(split_indices) == 0:
            # no split possible along this dimension
            return -self.log_p_data_no_split

        y_sorted = self.y[sort_indices]

        # compute data likelihoods of all possible splits along this projection and find split with highest data likelihood
        n_dim = self.X.shape[1]
        log_p_data_split = self.compute_log_p_data_split(
            y_sorted, self.prior, n_dim, split_indices)
        i_max = log_p_data_split.argmax()
        if log_p_data_split[i_max] >= self.best_log_p_data_split:
            best_split_index = split_indices[i_max]
            p1 = self.X[sort_indices[best_split_index - 1]]
            p2 = self.X[sort_indices[best_split_index]]
            if not dense:
                p1 = p1.toarray()[0]
                p2 = p2.toarray()[0]

            hyperplane_origin = 0.5 * (
                p1 + p2)  # middle between the points that are being split
            projections_with_origin = projections - np.dot(
                hyperplane_normal, hyperplane_origin)
            cumulative_distances = np.sum(np.abs(projections_with_origin))

            if log_p_data_split[i_max] > self.best_log_p_data_split:
                is_log_p_better_or_same_but_with_better_distance = True
            else:
                # accept new split with same log(p) only if it increases the cumulative distance of all points to the hyperplane
                is_log_p_better_or_same_but_with_better_distance = cumulative_distances > self.best_cumulative_distances

            if is_log_p_better_or_same_but_with_better_distance:
                self.best_log_p_data_split = log_p_data_split[i_max]
                self.best_cumulative_distances = cumulative_distances
                self.best_hyperplane_normal = hyperplane_normal
                self.best_hyperplane_origin = hyperplane_origin

        return -log_p_data_split[i_max]