Exemplo n.º 1
0
    def test_accountant(self):
        acc = BudgetAccountant(1.5, 0)

        a = np.array([1, 2, 3, 4, 5])
        histogram(a,
                  epsilon=1,
                  bins=3,
                  range=(0, 10),
                  density=True,
                  accountant=acc)
        self.assertEqual((1, 0), acc.total())

        with self.assertRaises(BudgetError):
            histogram(a,
                      epsilon=1,
                      bins=3,
                      range=(0, 10),
                      density=True,
                      accountant=acc)

        with self.assertRaises(TypeError):
            histogram(a,
                      epsilon=1,
                      bins=3,
                      range=(0, 10),
                      density=True,
                      accountant=[acc])
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant

        acc = BudgetAccountant()
        X = np.linspace(-1, 1, 100)
        y = X.copy()
        X = X[:, np.newaxis]

        clf = LinearRegression(epsilon=2,
                               fit_intercept=False,
                               bounds_X=(-1, 1),
                               bounds_y=(-1, 1),
                               accountant=acc)
        clf.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            clf = LinearRegression(epsilon=2,
                                   fit_intercept=False,
                                   bounds_X=(-1, 1),
                                   bounds_y=(-1, 1))
            clf.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(X, y)
Exemplo n.º 3
0
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.array([[12, 3, 14], [12, 3, 4], [12, 3, 4], [2, 13, 4],
                      [2, 13, 14], [2, 3, 14], [3, 5, 15]])
        y = np.array([1, 1, 1, 0, 0, 0, 1])
        model = RandomForestClassifier(epsilon=2,
                                       n_estimators=5,
                                       accountant=acc,
                                       feature_domains={
                                           '0': [2.0, 12.0],
                                           '1': [3.0, 13.0],
                                           '2': [3.0, 16.0]
                                       })
        model.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            model = RandomForestClassifier(epsilon=2,
                                           n_estimators=5,
                                           feature_domains={
                                               '0': [2.0, 12.0],
                                               '1': [3.0, 13.0],
                                               '2': [3.0, 16.0]
                                           })
            model.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                model.fit(X, y)
    def test_change_large_slack(self):
        acc = BudgetAccountant(1, 0.2, 0)
        acc.spend(0.1, 0.1)
        acc.spend(0.1, 0.1)

        with self.assertRaises(BudgetError):
            acc.slack = 0.2
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.array([
            0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75,
            3.00, 3.25, 3.50, 4.00, 4.25, 4.50, 4.75, 5.00, 5.50
        ])
        y = np.array(
            [0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1])
        X = X[:, np.newaxis]
        X -= 3.0
        X /= 2.5

        clf = LogisticRegression(epsilon=2, data_norm=1.0, accountant=acc)
        clf.fit(X, y)
        self.assertEqual((2, 0), acc.total())

        with BudgetAccountant(3, 0) as acc2:
            clf = LogisticRegression(epsilon=2, data_norm=1.0)
            clf.fit(X, y)
            self.assertEqual((2, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(X, y)
    def test_repr(self):
        acc = BudgetAccountant()

        self.assertIn("BudgetAccountant(", repr(acc))
        self.assertEqual("BudgetAccountant()", repr(acc))

        acc = BudgetAccountant(epsilon=1, delta=0.01, slack=0.01)
        self.assertIn("BudgetAccountant(", repr(acc))
        self.assertIn("epsilon", repr(acc))
        self.assertIn("delta", repr(acc))
        self.assertIn("slack", repr(acc))
        self.assertNotIn("spent_budget", repr(acc))

        acc = BudgetAccountant(spent_budget=[(1, 0), (0, 1)])
        self.assertIn("BudgetAccountant(", repr(acc))
        self.assertNotIn("epsilon", repr(acc))
        self.assertNotIn("delta", repr(acc))
        self.assertNotIn("slack", repr(acc))
        self.assertIn("spent_budget", repr(acc))

        acc = BudgetAccountant(spent_budget=[(1., 0.)] * 10 + [(5., 0.5)])
        self.assertIn("BudgetAccountant(", repr(acc))
        self.assertIn("...", repr(acc))
        self.assertNotIn("5", repr(acc))
        self.assertNotIn("5", acc.__repr__(10))
        self.assertIn("5", acc.__repr__(11))
    def test_accountant_with_axes(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        a = np.random.random((1000, 4))
        median(a, epsilon=1, bounds=(0, 1), axis=0, accountant=acc)

        # Expecting a different spend on each of the 8 outputs
        self.assertEqual((1, 0), acc.total())
        self.assertEqual(4, len(acc))
    def test_default(self):
        # Specify accountant as arg
        acc1 = BudgetAccountant(1.5, 0.0)
        self.sample_model(accountant=acc1)
        self.assertEqual((1.0, 0.0), acc1.total())

        # Use default accountant without one being set
        self.sample_model()
        acc2 = BudgetAccountant.pop_default()
        self.assertIsNot(acc1, acc2)
        self.assertEqual(float("inf"), acc2.epsilon)
        self.assertEqual((1.0, 0.0), acc2.total())

        # Set accountant as default
        acc3 = BudgetAccountant(2.0, 0.0).set_default()
        self.sample_model(epsilon=1.5)
        self.assertEqual((1.5, 0), acc3.total())
        self.assertEqual(2.0, acc3.epsilon)
        self.assertIsNot(acc3, acc2)
        self.assertIsNot(acc3, acc1)

        # Check default is same as what we set it
        acc4 = BudgetAccountant.pop_default()
        self.assertIs(acc3, acc4)
        self.assertEqual((1.0, 0.0), acc2.total())

        # Run again in 2 different functions without setting a default
        self.sample_model()
        self.sample_model2()
        acc5 = BudgetAccountant.pop_default()
        self.assertIsNot(acc5, acc2)
        self.assertIsNot(acc5, acc3)
        self.assertEqual((2.0, 0), acc5.total())
Exemplo n.º 9
0
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant(1.5, 0)

        a = np.random.random((1000, 5)).round()
        count_nonzero(a, epsilon=1, accountant=acc)
        self.assertEqual((1.0, 0), acc.total())

        with acc:
            with self.assertRaises(BudgetError):
                count_nonzero(a, epsilon=1)
Exemplo n.º 10
0
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant(1.5, 0)

        a = np.random.random((1000, 5))
        var(a, epsilon=1, bounds=(0, 1), accountant=acc)
        self.assertEqual((1.0, 0), acc.total())

        with acc:
            with self.assertRaises(BudgetError):
                var(a, epsilon=1, bounds=(0, 1))
    def test_with_statement_errors(self):
        with BudgetAccountant(1.5) as acc:
            self.assertIsInstance(acc, BudgetAccountant)

            self.sample_model(1)

            with self.assertRaises(BudgetError):
                self.sample_model(1)

        with self.assertRaises(BudgetError):
            with BudgetAccountant(1):
                self.sample_model(2)
Exemplo n.º 12
0
    def test_default_accountant(self):
        BudgetAccountant.pop_default()

        a = np.array([1, 2, 3, 4, 5])
        histogram(a, epsilon=1, bins=3, range=(0, 10), density=True)
        acc = BudgetAccountant.pop_default()
        self.assertEqual((1, 0), acc.total())

        histogram(a, epsilon=1, bins=3, range=(0, 10))
        acc2 = BudgetAccountant.pop_default()
        self.assertEqual((1, 0), acc.total())
        self.assertIsNot(acc, acc2)
Exemplo n.º 13
0
    def test_default_accountant(self):
        BudgetAccountant.pop_default()

        a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T
        histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True)
        acc = BudgetAccountant.pop_default()
        self.assertEqual((1, 0), acc.total())
        self.assertEqual(acc.epsilon, float("inf"))
        self.assertEqual(acc.delta, 1.0)

        histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)])

        self.assertEqual((1, 0), acc.total())
    def test_change_spent_budget(self):
        acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)])

        with self.assertRaises(AttributeError):
            acc.spent_budget = [(1, 0)]

        with self.assertRaises(AttributeError):
            del acc.spent_budget

        acc.spent_budget.append((1, 0))
        self.assertEqual(2, len(acc))
        self.assertEqual(acc.spent_budget, acc._BudgetAccountant__spent_budget)
        self.assertIsNot(acc.spent_budget, acc._BudgetAccountant__spent_budget)
    def test_correct_composition(self):
        epsilons = [2**-9] * 700
        slack = 2**-25

        acc = BudgetAccountant(slack=slack)

        for epsilon in epsilons:
            acc.spend(epsilon, 0)

        spent_epsilon, spent_delta = acc.total()

        self.assertAlmostEqual(spent_epsilon, 0.27832280615743646366122002955588987576913442137093, places=14)
        self.assertEqual(spent_delta, slack)
    def test_change_budget(self):
        acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)])

        with self.assertRaises(AttributeError):
            acc.epsilon = 2

        with self.assertRaises(AttributeError):
            del acc.epsilon

        with self.assertRaises(AttributeError):
            acc.delta = 0.1

        with self.assertRaises(AttributeError):
            del acc.delta
    def test_spend_errors(self):
        acc = BudgetAccountant()

        with self.assertRaises(ValueError):
            acc.spend(0, 0)

        with self.assertRaises(ValueError):
            acc.spend(-1, 0)

        with self.assertRaises(ValueError):
            acc.spend(1, -1)

        with self.assertRaises(ValueError):
            acc.spend(1, 2)
 def test_inf_spend(self):
     acc = BudgetAccountant()
     acc.spend(float("inf"), 1)
     self.assertEqual((float("inf"), 1), acc.total())
     self.assertEqual((float("inf"), 1), acc.remaining())
     self.assertEqual((float("inf"), 1), acc.remaining(100))
     self.assertTrue(acc.check(float("inf"), 1))
    def test_remaining_budget_inf(self):
        acc = BudgetAccountant()
        self.assertEqual((float("inf"), 1.0), acc.remaining())
        self.assertEqual((float("inf"), 1.0), acc.remaining(100))

        acc.spend(float("inf"), 1)
        self.assertEqual((float("inf"), 1.0), acc.remaining())
        self.assertEqual((float("inf"), 1.0), acc.remaining(100))
Exemplo n.º 20
0
    def __init__(self,
                 n_estimators=10,
                 *,
                 epsilon=1.0,
                 cat_feature_threshold=10,
                 n_jobs=1,
                 verbose=0,
                 accountant=None,
                 max_depth=15,
                 random_state=None,
                 feature_domains=None,
                 **unused_args):
        super().__init__(base_estimator=DecisionTreeClassifier(),
                         n_estimators=n_estimators,
                         estimator_params=("cat_feature_threshold",
                                           "max_depth", "epsilon",
                                           "random_state"),
                         n_jobs=n_jobs,
                         random_state=random_state,
                         verbose=verbose)
        self.epsilon = epsilon
        self.cat_feature_threshold = cat_feature_threshold
        self.max_depth = max_depth
        self.accountant = BudgetAccountant.load_default(accountant)
        self.feature_domains = feature_domains

        if random_state is not None:
            np.random.seed(random_state)

        self._warn_unused_args(unused_args)
Exemplo n.º 21
0
    def __init__(self,
                 n_components=None,
                 centered=False,
                 epsilon=1.0,
                 data_norm=None,
                 bounds=None,
                 copy=True,
                 whiten=False,
                 random_state=None,
                 accountant=None,
                 **unused_args):
        super().__init__(n_components=n_components,
                         copy=copy,
                         whiten=whiten,
                         svd_solver='full',
                         tol=0.0,
                         iterated_power='auto',
                         random_state=random_state)
        self.centered = centered
        self.epsilon = epsilon
        self.data_norm = data_norm
        self.bounds = bounds
        self.accountant = BudgetAccountant.load_default(accountant)

        warn_unused_args(unused_args)
Exemplo n.º 22
0
def _sum(array, epsilon=1.0, bounds=None, accountant=None, axis=None, dtype=None, keepdims=False, nan=False):
    if bounds is None:
        warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will "
                      "result in additional privacy leakage. To ensure differential privacy and no additional "
                      "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning)
        bounds = (np.min(array), np.max(array))

    if axis is not None or keepdims:
        return _wrap_axis(_sum, array, epsilon=epsilon, bounds=bounds, accountant=accountant, axis=axis, dtype=dtype,
                          keepdims=keepdims, nan=nan)

    lower, upper = check_bounds(bounds, shape=0, dtype=dtype)

    accountant = BudgetAccountant.load_default(accountant)
    accountant.check(epsilon, 0)

    # Let's ravel array to be single-dimensional
    array = clip_to_bounds(np.ravel(array), bounds)

    _func = np.nansum if nan else np.sum
    actual_sum = _func(array, axis=axis, dtype=dtype, keepdims=keepdims)

    mech = GeometricTruncated if dtype is not None and issubclass(dtype, Integral) else LaplaceTruncated
    mech = mech(epsilon=epsilon, sensitivity=upper - lower, lower=lower * array.size, upper=upper * array.size)
    output = mech.randomise(actual_sum)

    accountant.spend(epsilon, 0)

    return output
Exemplo n.º 23
0
def _var(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False, accountant=None, nan=False):
    if bounds is None:
        warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will "
                      "result in additional privacy leakage. To ensure differential privacy and no additional "
                      "privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning)
        bounds = (np.min(array), np.max(array))

    if axis is not None or keepdims:
        return _wrap_axis(_var, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
                          accountant=accountant, nan=nan)

    lower, upper = check_bounds(bounds, shape=0, dtype=dtype)

    accountant = BudgetAccountant.load_default(accountant)
    accountant.check(epsilon, 0)

    # Let's ravel array to be single-dimensional
    array = clip_to_bounds(np.ravel(array), bounds)

    _func = np.nanvar if nan else np.var
    actual_var = _func(array, axis=axis, dtype=dtype, keepdims=keepdims)

    dp_mech = LaplaceBoundedDomain(epsilon=epsilon, delta=0,
                                   sensitivity=((upper - lower) / array.size) ** 2 * (array.size - 1), lower=0,
                                   upper=((upper - lower) ** 2) / 4)
    output = dp_mech.randomise(actual_var)

    accountant.spend(epsilon, 0)

    return output
Exemplo n.º 24
0
    def __init__(self,
                 epsilon=1.0,
                 data_norm=None,
                 tol=1e-4,
                 C=1.0,
                 fit_intercept=True,
                 max_iter=100,
                 verbose=0,
                 warm_start=False,
                 n_jobs=None,
                 accountant=None,
                 **unused_args):
        super().__init__(penalty='l2',
                         dual=False,
                         tol=tol,
                         C=C,
                         fit_intercept=fit_intercept,
                         intercept_scaling=1.0,
                         class_weight=None,
                         random_state=None,
                         solver='lbfgs',
                         max_iter=max_iter,
                         multi_class='ovr',
                         verbose=verbose,
                         warm_start=warm_start,
                         n_jobs=n_jobs)
        self.epsilon = epsilon
        self.data_norm = data_norm
        self.classes_ = None
        self.accountant = BudgetAccountant.load_default(accountant)

        warn_unused_args(unused_args)
    def test_get_spent_budget(self):
        acc = BudgetAccountant(1, 0, spent_budget=[(0.5, 0), (0.5, 0)])

        spent_budget = acc.spent_budget

        self.assertIsInstance(spent_budget, list)
        self.assertEqual(2, len(spent_budget))
    def test_spend_exceed(self):
        acc = BudgetAccountant(5, 0)
        acc.spend(3, 0)

        with self.assertRaises(BudgetError):
            acc.spend(3, 0)

        with self.assertRaises(BudgetError):
            acc.spend(0, 1e-5)
Exemplo n.º 27
0
    def test_accountant(self):
        acc = BudgetAccountant(1.5, 0)

        a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]).T
        histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, accountant=acc)

        with self.assertRaises(BudgetError):
            histogramdd(a, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, accountant=acc)
def _preprocess_data(X,
                     y,
                     fit_intercept,
                     epsilon=1.0,
                     bounds_X=None,
                     bounds_y=None,
                     copy=True,
                     check_input=True,
                     **unused_args):
    warn_unused_args(unused_args)

    if check_input:
        X = check_array(X, copy=copy, accept_sparse=False, dtype=FLOAT_DTYPES)
    elif copy:
        X = X.copy(order='K')

    y = np.asarray(y, dtype=X.dtype)
    X_scale = np.ones(X.shape[1], dtype=X.dtype)

    if fit_intercept:
        bounds_X = check_bounds(bounds_X, X.shape[1])
        bounds_y = check_bounds(bounds_y, y.shape[1] if y.ndim > 1 else 1)

        X = clip_to_bounds(X, bounds_X)
        y = clip_to_bounds(y, bounds_y)

        X_offset = mean(X,
                        axis=0,
                        bounds=bounds_X,
                        epsilon=epsilon,
                        accountant=BudgetAccountant())
        X -= X_offset
        y_offset = mean(y,
                        axis=0,
                        bounds=bounds_y,
                        epsilon=epsilon,
                        accountant=BudgetAccountant())
        y = y - y_offset
    else:
        X_offset = np.zeros(X.shape[1], dtype=X.dtype)
        if y.ndim == 1:
            y_offset = X.dtype.type(0)
        else:
            y_offset = np.zeros(y.shape[1], dtype=X.dtype)

    return X, y, X_offset, y_offset, X_scale
Exemplo n.º 29
0
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.random.rand(10, 5)
        ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc)
        ss.fit(X)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            ss = StandardScaler(epsilon=1, bounds=(0, 1))
            ss.fit(X)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                ss.fit(X)

        self.assertEqual((1, 0), acc.total())
    def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        x_train = np.random.random((10, 2))
        y_train = np.random.randint(2, size=10)

        clf = GaussianNB(epsilon=1.0, bounds=(0, 1), accountant=acc)
        clf.fit(x_train, y_train)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            clf = GaussianNB(epsilon=1.0, bounds=(0, 1))
            clf.fit(x_train, y_train)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                clf.fit(x_train, y_train)