def _update_centers(self, X, centers, labels, dims, total_iters): """Updates the centers of the KMeans algorithm for the current iteration, while satisfying differential privacy. Differential privacy is satisfied by adding (integer-valued, using :class:`.GeometricFolded`) random noise to the count of nearest neighbours to the previous cluster centers, and adding (real-valued, using :class:`.LaplaceBoundedDomain`) random noise to the sum of values per dimension. """ epsilon_0, epsilon_i = self._split_epsilon(dims, total_iters) geometric_mech = GeometricFolded().set_sensitivity(1).set_bounds(0.5, float("inf")).set_epsilon(epsilon_0) laplace_mech = LaplaceBoundedDomain().set_epsilon(epsilon_i) for cluster in range(self.n_clusters): if cluster not in labels: continue cluster_count = sum(labels == cluster) noisy_count = geometric_mech.randomise(cluster_count) cluster_sum = np.sum(X[labels == cluster], axis=0) noisy_sum = np.zeros_like(cluster_sum) for i in range(dims): laplace_mech.set_sensitivity(self.bounds[1][i] - self.bounds[0][i]) \ .set_bounds(noisy_count * self.bounds[0][i], noisy_count * self.bounds[1][i]) noisy_sum[i] = laplace_mech.randomise(cluster_sum[i]) centers[cluster, :] = noisy_sum / noisy_count return centers
def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = GeometricFolded()
class TestGeometricFolded(TestCase): def setup_method(self, method): if method.__name__.endswith("prob"): global_seed(314159) self.mech = GeometricFolded() def teardown_method(self, method): del self.mech def test_not_none(self): self.assertIsNotNone(self.mech) def test_class(self): from diffprivlib.mechanisms import DPMechanism self.assertTrue(issubclass(GeometricFolded, DPMechanism)) def test_no_params(self): with self.assertRaises(ValueError): self.mech.randomise(1) def test_no_sensitivity(self): self.mech.set_epsilon(1).set_bounds(0, 10) with self.assertRaises(ValueError): self.mech.randomise(1) def test_non_integer_sensitivity(self): self.mech.set_epsilon(1).set_bounds(0, 10) with self.assertRaises(TypeError): self.mech.set_sensitivity(0.5) def test_no_epsilon(self): self.mech.set_sensitivity(1).set_bounds(0, 10) with self.assertRaises(ValueError): self.mech.randomise(1) def test_non_zero_delta(self): self.mech.set_sensitivity(1).set_bounds(0, 10) with self.assertRaises(ValueError): self.mech.set_epsilon_delta(1, 0.5) def test_neg_epsilon(self): self.mech.set_sensitivity(1).set_bounds(0, 10) with self.assertRaises(ValueError): self.mech.set_epsilon(-1) def test_inf_epsilon(self): self.mech.set_sensitivity(1).set_epsilon(float("inf")).set_bounds( 0, 10) for i in range(1000): self.assertEqual(self.mech.randomise(1), 1) def test_complex_epsilon(self): with self.assertRaises(TypeError): self.mech.set_epsilon(1 + 2j) def test_string_epsilon(self): with self.assertRaises(TypeError): self.mech.set_epsilon("Two") def test_no_bounds(self): self.mech.set_sensitivity(1).set_epsilon(1) with self.assertRaises(ValueError): self.mech.randomise(1) def test_half_integer_bounds(self): self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 1.5) val = self.mech.randomise(0) self.assertTrue(isinstance(val, int)) def test_non_half_integer_bounds(self): self.mech.set_sensitivity(1).set_epsilon(1) with self.assertRaises(ValueError): self.mech.set_bounds(1, 2.2) def test_non_numeric(self): self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 10) with self.assertRaises(TypeError): self.mech.randomise("Hello") def test_non_integer(self): self.mech.set_sensitivity(1).set_epsilon(1).set_bounds(0, 10) with self.assertRaises(TypeError): self.mech.randomise(1.0) def test_zero_median_prob(self): self.mech.set_sensitivity(1).set_bounds(0, 4).set_epsilon(1) vals = [] for i in range(10000): vals.append(self.mech.randomise(2)) median = float(np.median(vals)) self.assertAlmostEqual(np.abs(median), 2.0, delta=0.1) def test_neighbors_prob(self): epsilon = 1 runs = 10000 self.mech.set_sensitivity(1).set_epsilon(epsilon).set_bounds(0, 4) count = [0, 0] for i in range(runs): val0 = self.mech.randomise(1) if val0 <= 1: count[0] += 1 val1 = self.mech.randomise(2) if val1 <= 1: count[1] += 1 self.assertGreater(count[0], count[1]) self.assertLessEqual(count[0] / runs, np.exp(epsilon) * count[1] / runs + 0.1)