Exemplo n.º 1
0
    def _update_centers(self, X, centers, labels, dims, total_iters):
        """Updates the centers of the KMeans algorithm for the current iteration, while satisfying differential
        privacy.

        Differential privacy is satisfied by adding (integer-valued, using :class:`.GeometricFolded`) random noise to
        the count of nearest neighbours to the previous cluster centers, and adding (real-valued, using
        :class:`.LaplaceBoundedDomain`) random noise to the sum of values per dimension.

        """
        epsilon_0, epsilon_i = self._split_epsilon(dims, total_iters)
        geometric_mech = GeometricFolded().set_sensitivity(1).set_bounds(0.5, float("inf")).set_epsilon(epsilon_0)
        laplace_mech = LaplaceBoundedDomain().set_epsilon(epsilon_i)

        for cluster in range(self.n_clusters):
            if cluster not in labels:
                continue

            cluster_count = sum(labels == cluster)
            noisy_count = geometric_mech.randomise(cluster_count)

            cluster_sum = np.sum(X[labels == cluster], axis=0)
            noisy_sum = np.zeros_like(cluster_sum)

            for i in range(dims):
                laplace_mech.set_sensitivity(self.bounds[1][i] - self.bounds[0][i]) \
                    .set_bounds(noisy_count * self.bounds[0][i], noisy_count * self.bounds[1][i])
                noisy_sum[i] = laplace_mech.randomise(cluster_sum[i])

            centers[cluster, :] = noisy_sum / noisy_count

        return centers
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = GeometricFolded()
class TestGeometricFolded(TestCase):
    def setup_method(self, method):
        if method.__name__.endswith("prob"):
            global_seed(314159)

        self.mech = GeometricFolded()

    def teardown_method(self, method):
        del self.mech

    def test_not_none(self):
        self.assertIsNotNone(self.mech)

    def test_class(self):
        from diffprivlib.mechanisms import DPMechanism
        self.assertTrue(issubclass(GeometricFolded, DPMechanism))

    def test_no_params(self):
        with self.assertRaises(ValueError):
            self.mech.randomise(1)

    def test_no_sensitivity(self):
        self.mech.set_epsilon(1).set_bounds(0, 10)
        with self.assertRaises(ValueError):
            self.mech.randomise(1)

    def test_non_integer_sensitivity(self):
        self.mech.set_epsilon(1).set_bounds(0, 10)
        with self.assertRaises(TypeError):
            self.mech.set_sensitivity(0.5)

    def test_no_epsilon(self):
        self.mech.set_sensitivity(1).set_bounds(0, 10)
        with self.assertRaises(ValueError):
            self.mech.randomise(1)

    def test_non_zero_delta(self):
        self.mech.set_sensitivity(1).set_bounds(0, 10)
        with self.assertRaises(ValueError):
            self.mech.set_epsilon_delta(1, 0.5)

    def test_neg_epsilon(self):
        self.mech.set_sensitivity(1).set_bounds(0, 10)
        with self.assertRaises(ValueError):
            self.mech.set_epsilon(-1)

    def test_inf_epsilon(self):
        self.mech.set_sensitivity(1).set_epsilon(float("inf")).set_bounds(
            0, 10)

        for i in range(1000):
            self.assertEqual(self.mech.randomise(1), 1)

    def test_complex_epsilon(self):
        with self.assertRaises(TypeError):
            self.mech.set_epsilon(1 + 2j)

    def test_string_epsilon(self):
        with self.assertRaises(TypeError):
            self.mech.set_epsilon("Two")

    def test_no_bounds(self):
        self.mech.set_sensitivity(1).set_epsilon(1)
        with self.assertRaises(ValueError):
            self.mech.randomise(1)

    def test_half_integer_bounds(self):
        self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 1.5)
        val = self.mech.randomise(0)
        self.assertTrue(isinstance(val, int))

    def test_non_half_integer_bounds(self):
        self.mech.set_sensitivity(1).set_epsilon(1)
        with self.assertRaises(ValueError):
            self.mech.set_bounds(1, 2.2)

    def test_non_numeric(self):
        self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 10)
        with self.assertRaises(TypeError):
            self.mech.randomise("Hello")

    def test_non_integer(self):
        self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 10)
        with self.assertRaises(TypeError):
            self.mech.randomise(1.0)

    def test_zero_median_prob(self):
        self.mech.set_sensitivity(1).set_bounds(0, 4).set_epsilon(1)
        vals = []

        for i in range(10000):
            vals.append(self.mech.randomise(2))

        median = float(np.median(vals))
        self.assertAlmostEqual(np.abs(median), 2.0, delta=0.1)

    def test_neighbors_prob(self):
        epsilon = 1
        runs = 10000
        self.mech.set_sensitivity(1).set_epsilon(epsilon).set_bounds(0, 4)
        count = [0, 0]

        for i in range(runs):
            val0 = self.mech.randomise(1)
            if val0 <= 1:
                count[0] += 1

            val1 = self.mech.randomise(2)
            if val1 <= 1:
                count[1] += 1

        self.assertGreater(count[0], count[1])
        self.assertLessEqual(count[0] / runs,
                             np.exp(epsilon) * count[1] / runs + 0.1)