def test_sample_error():
    array = np.ones(100)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)
    sample_size = 101

    with pytest.raises(ValueError):
        access_mode = SampleWithoutReplacement(LaplaceMechanism(1, 1),
                                               sample_size, array.shape)
def test_configure_data_access():
    data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 1))
    data_node = DataNode()
    data_node.set_private_data("test", np.array(range(10)))
    with pytest.raises(ValueError):
        data_node.configure_data_access("test", data_access_definition)
        data_node.query("test")
def test_exception_budget_2():
    data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 0.001))
    data_node = DataNode()
    array = np.array(range(10))
    data_node.set_private_data("test", array)

    data_node.configure_data_access("test", data_access_definition)
    with pytest.raises(ExceededPrivacyBudgetError):
        for i in range(1, 1000):
            data_node.query("test", differentially_private_mechanism=GaussianMechanism(1, epsilon_delta=(0.1, 1)))
Пример #4
0
def test_exponential_mechanism_pricing():
    def u(x, r):
        output = np.zeros(len(r))
        for i in range(len(r)):
            output[i] = r[i] * sum(np.greater_equal(x, r[i]))
        return output

    x = [1.00, 1.00, 1.00, 3.01]  # Input dataset: the true bids
    r = np.arange(0, 3.5, 0.001)  # Set the interval of possible outputs r
    delta_u = r.max()  # In this specific case, Delta u = max(r)
    epsilon = 5  # Set a value for epsilon
    size = 10000  # We want to repeat the query this many times

    node = DataNode()
    node.set_private_data(name="bids", data=np.array(x))
    data_access_definition = ExponentialMechanism(u, r, delta_u, epsilon, size)
    node.configure_data_access("bids", data_access_definition)
    result = node.query("bids")
    y_bin, x_bin = np.histogram(a=result,
                                bins=int(round(np.sqrt(len(result)))),
                                density=True)

    max_price = x_bin[np.where(y_bin == y_bin.max())]
    min_price = x_bin[np.where(y_bin == y_bin.min())]
    bin_size = x_bin[1] - x_bin[0]
    assert (1.00 - x_bin[np.where(y_bin == max_price)] >
            bin_size).all()  # Check the best price is close to 1.00
    assert ((x_bin[np.where(y_bin == min_price)] > (3.01 - bin_size)
             ).all()  # Check the no-revenue price is either greater than 3.01
            or x_bin[np.where(y_bin == min_price)][0] < bin_size
            )  # or close to 0.00
def test_sample_without_replacement():
    array = np.ones(100)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)
    sample_size = 50

    def u(x, r):
        output = np.zeros(len(r))
        for i in range(len(r)):
            output[i] = r[i] * sum(np.greater_equal(x, r[i]))
        return output

    r = np.arange(0, 3.5, 0.001)
    delta_u = r.max()
    epsilon = 5
    exponential_mechanism = ExponentialMechanism(u,
                                                 r,
                                                 delta_u,
                                                 epsilon,
                                                 size=sample_size)

    access_modes = [LaplaceMechanism(1, 1)]
    access_modes.append(GaussianMechanism(1, (0.5, 0.5)))
    access_modes.append(RandomizedResponseBinary(0.5, 0.5, 1))
    access_modes.append(RandomizedResponseCoins())
    access_modes.append(exponential_mechanism)

    for a in access_modes:
        sampling_method = SampleWithoutReplacement(a, sample_size, array.shape)
        node_single.configure_data_access("array", sampling_method)
        result = node_single.query("array")
        assert result.shape[0] == sample_size
Пример #6
0
def test_randomize_binary_mechanism_no_binary_scalar():
    scalar = 0.1
    node_single = DataNode()
    node_single.set_private_data(name="scalar", data=scalar)
    data_access_definition = RandomizedResponseBinary(f0=0.5,
                                                      f1=0.5,
                                                      epsilon=1)
    node_single.configure_data_access("scalar", data_access_definition)

    with pytest.raises(ValueError):
        node_single.query("scalar")
def test_exception_exceeded_privacy_budget_error():
    scalar = 175

    dp_mechanism = GaussianMechanism(1, epsilon_delta=(0.1, 1))
    data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 0),
                                                         differentially_private_mechanism=dp_mechanism)
    node = DataNode()
    node.set_private_data("scalar", scalar)
    node.configure_data_access("scalar", data_access_definition)

    with pytest.raises(ExceededPrivacyBudgetError):
        node.query("scalar")
Пример #8
0
def test_laplace_scalar_mechanism():
    scalar = 175

    node = DataNode()
    node.set_private_data("scalar", scalar)
    node.configure_data_access("scalar", LaplaceMechanism(1, 1))

    result = node.query("scalar")

    assert scalar != result
    assert np.abs(scalar - result) < 100
Пример #9
0
def test_randomize_binary_mechanism_scalar_coins():
    scalar = 1
    node_single = DataNode()
    node_single.set_private_data(name="scalar", data=scalar)

    node_single.configure_data_access("scalar", RandomizedResponseCoins())

    result = node_single.query(private_property="scalar")

    assert np.isscalar(result)
    assert result == 0 or result == 1
def test_data_access():
    data_access_definition = AdaptiveDifferentialPrivacy(epsilon_delta=(1, 1))
    data_node = DataNode()
    array = np.array(range(10))
    data_node.set_private_data("test", array)

    data_node.configure_data_access("test", data_access_definition)
    query_result = data_node.query("test", differentially_private_mechanism=GaussianMechanism(1,
                                                                                              epsilon_delta=(0.1, 1)))

    assert query_result is not None
Пример #11
0
def test_laplace_dictionary_mechanism_wrong_shapes():
    dictionary = {0: np.array([2, 3, 5]), 1: np.array([[1, 3, 1], [1, 4, 6]])}

    sensitivity = {
        0: np.array([[1, 1, 2], [2, 1, 1]]),
        1: np.array([3, 1, 11, 1, 2])
    }

    node = DataNode()
    node.set_private_data("dictionary", dictionary)
    dp_access_mechanism = LaplaceMechanism(sensitivity, 1)
    node.configure_data_access("dictionary", dp_access_mechanism)

    with pytest.raises(ValueError):
        node.query("dictionary")
Пример #12
0
def test_gaussian_scalar_mechanism():
    scalar = 175

    node = DataNode()
    node.set_private_data("scalar", scalar)
    node.configure_data_access("scalar",
                               GaussianMechanism(1, epsilon_delta=(0.1, 1)))

    result = node.query("scalar")

    assert scalar != result
    assert np.abs(scalar - result) < 100
Пример #13
0
def test_randomize_binary_random_scalar_0():
    scalar = 0
    node_single = DataNode()
    node_single.set_private_data(name="scalar", data=scalar)
    data_access_definition = RandomizedResponseBinary(f0=0.5,
                                                      f1=0.5,
                                                      epsilon=1)
    node_single.configure_data_access("scalar", data_access_definition)

    result = node_single.query(private_property="scalar")

    assert np.isscalar(result)
    assert result == 0 or result == 1
def test_sample_without_replacement_multidimensional():
    array = np.ones((100, 2))
    sample_size = 50
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)
    epsilon = 1
    access_mode = LaplaceMechanism(1, epsilon)
    sampling_method = SampleWithoutReplacement(access_mode, sample_size,
                                               array.shape)
    node_single.configure_data_access("array", sampling_method)
    result = node_single.query("array")

    assert result.shape[0] == sample_size
Пример #15
0
def test_randomize_binary_mechanism_array_almost_always_true_values_coins():
    array = np.ones(1000)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)

    # Very low heads probability in the first attempt, mean should be near true value
    data_access_definition = RandomizedResponseCoins(prob_head_first=0.01,
                                                     prob_head_second=0.9)
    node_single.configure_data_access("array", data_access_definition)

    result = node_single.query("array")

    assert 1 - np.mean(result) < 0.05
Пример #16
0
def test_laplace_dictionary_mechanism():
    dictionary = {
        0: np.array([[2, 4, 5], [2, 3, 5]]),
        1: np.array([[1, 3, 1], [1, 4, 6]])
    }

    node = DataNode()
    node.set_private_data("dictionary", dictionary)
    node.configure_data_access("dictionary", LaplaceMechanism(1, 1))

    result = node.query("dictionary")

    assert dictionary.keys() == result.keys()
    assert np.mean(dictionary[0]) - np.mean(result[0]) < 5
Пример #17
0
def test_randomize_binary_mechanism_array_almost_always_true_values_ones():
    array = np.ones(1000)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)

    # Prob of one given 1 very high, mean should be near 1
    data_access_definition = RandomizedResponseBinary(f0=0.5,
                                                      f1=0.99,
                                                      epsilon=5)
    node_single.configure_data_access("array", data_access_definition)

    result = node_single.query("array")

    assert 1 - np.mean(result) < 0.05
Пример #18
0
def test_randomize_binary_mechanism_array_almost_always_false_values_zeros():
    array = np.zeros(1000)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)

    # Prob of one given 1 very low, mean should be near 0
    data_access_definition = RandomizedResponseBinary(f0=0.01,
                                                      f1=0.5,
                                                      epsilon=1)
    node_single.configure_data_access("array", data_access_definition)

    result = node_single.query("array")

    assert np.abs(1 - np.mean(result)) < 0.05
Пример #19
0
def test_randomize_binary_mechanism_array_coins():
    array = np.ones(100)
    node_single = DataNode()
    node_single.set_private_data(name="array", data=array)

    node_single.configure_data_access("array", RandomizedResponseCoins())

    result = node_single.query("array")
    differences = 0
    for i in range(100):
        if result[i] != array[i]:
            differences = differences + 1

    assert not np.isscalar(result)
    assert 0 < differences < 100
    assert np.mean(result) < 1
Пример #20
0
def test_randomize_binary_random():
    data_size = 100
    array = np.ones(data_size)
    node_single = DataNode()
    node_single.set_private_data(name="A", data=array)
    data_access_definition = RandomizedResponseBinary(f0=0.5,
                                                      f1=0.5,
                                                      epsilon=1)
    node_single.configure_data_access("A", data_access_definition)

    result = node_single.query(private_property="A")

    differences = 0
    for i in range(data_size):
        if result[i] != array[i]:
            differences = differences + 1

    assert 0 < differences < data_size
    assert np.mean(result) < 1
Пример #21
0
def test_exponential_mechanism_obtain_laplace():
    def u_laplacian(x, r):
        output = -np.absolute(x - r)
        return output

    r = np.arange(-20, 20, 0.001)  # Set the interval of possible outputs r
    x = 3.5  # Set a value for the dataset
    delta_u = 1  # We simply set it to one
    epsilon = 1  # Set a value for epsilon
    size = 100000  # We want to repeat the query this many times

    node = DataNode()
    node.set_private_data(name="identity", data=np.array(x))

    data_access_definition = ExponentialMechanism(u_laplacian, r, delta_u,
                                                  epsilon, size)
    node.configure_data_access("identity", data_access_definition)
    result = node.query("identity")

    assert (result > r.min()).all() and (
        result < r.max()).all()  # Check all outputs are within range
    assert np.absolute(np.mean(result) - x) < (
        delta_u / epsilon)  # Check the mean output is close to true value
Пример #22
0
def test_sensitivity_wrong_input():

    epsilon_delta = (0.1, 1)

    # Negative sensitivity:
    scalar = 175
    sensitivity = -0.1
    node = DataNode()
    node.set_private_data("scalar", scalar)
    with pytest.raises(ValueError):
        node.configure_data_access(
            "scalar",
            GaussianMechanism(sensitivity=sensitivity,
                              epsilon_delta=epsilon_delta))

    # Scalar query result, Too many sensitivity values provided:
    scalar = 175
    sensitivity = [0.1, 0.5]
    node = DataNode()
    node.set_private_data("scalar", scalar)
    node.configure_data_access(
        "scalar",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("scalar")

    # Both query result and sensitivity are 1D-arrays, but non-broadcastable:
    data_array = [10, 10, 10, 10]
    sensitivity = [0.1, 10, 100, 1000, 1000]
    node = DataNode()
    node.set_private_data("data_array", data_array)
    node.configure_data_access(
        "data_array",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_array")

    # ND-array query result and 1D-array sensitivity, but non-broadcastable:
    data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]]
    sensitivity = [0.1, 10, 100]
    node = DataNode()
    node.set_private_data("data_ndarray", data_ndarray)
    node.configure_data_access(
        "data_ndarray",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_ndarray")

    # Both query result and sensitivity are ND-arrays, but non-broadcastable (they should have the same shape in this case):
    data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]]
    sensitivity = [[0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000],
                   [0.1, 10, 100, 1000, 10000]]
    node = DataNode()
    node.set_private_data("data_ndarray", data_ndarray)
    node.configure_data_access(
        "data_ndarray",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_ndarray")
Пример #23
0
def test_sensitivity_wrong_input():

    epsilon_delta = (0.1, 1)

    # Negative sensitivity:
    scalar = 175
    sensitivity = -0.1
    node = DataNode()
    node.set_private_data("scalar", scalar)
    with pytest.raises(ValueError):
        node.configure_data_access(
            "scalar",
            GaussianMechanism(sensitivity=sensitivity,
                              epsilon_delta=epsilon_delta))

    # Scalar query result, Too many sensitivity values provided:
    scalar = 175
    sensitivity = [0.1, 0.5]
    node = DataNode()
    node.set_private_data("scalar", scalar)
    node.configure_data_access(
        "scalar",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("scalar")

    # Both query result and sensitivity are 1D-arrays, but non-broadcastable:
    data_array = [10, 10, 10, 10]
    sensitivity = [0.1, 10, 100, 1000, 1000]
    node = DataNode()
    node.set_private_data("data_array", data_array)
    node.configure_data_access(
        "data_array",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_array")

    # ND-array query result and 1D-array sensitivity, but non-broadcastable:
    data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]]
    sensitivity = [0.1, 10, 100]
    node = DataNode()
    node.set_private_data("data_ndarray", data_ndarray)
    node.configure_data_access(
        "data_ndarray",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_ndarray")

    # Both query result and sensitivity are ND-arrays, but non-broadcastable (they should have the same shape
    # in this case):
    data_ndarray = [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]]
    sensitivity = [[0.1, 10, 100, 1000, 10000], [0.1, 10, 100, 1000, 10000],
                   [0.1, 10, 100, 1000, 10000]]
    node = DataNode()
    node.set_private_data("data_ndarray", data_ndarray)
    node.configure_data_access(
        "data_ndarray",
        GaussianMechanism(sensitivity=sensitivity,
                          epsilon_delta=epsilon_delta))
    with pytest.raises(ValueError):
        result = node.query("data_ndarray")

    # Query result is a list of arrays: sensitivity must be either a scalar, or a list of the same length as query
    data_list = [
        np.random.rand(30, 20),
        np.random.rand(20, 30),
        np.random.rand(50, 40)
    ]
    sensitivity = np.array([1, 1])  # Array instead of scalar
    node = DataNode()
    node.set_private_data("data_list", data_list)
    node.configure_data_access(
        "data_list", LaplaceMechanism(sensitivity=sensitivity, epsilon=1))
    with pytest.raises(ValueError):
        result = node.query("data_list")

    sensitivity = [1, 1]  # List of wrong length
    node = DataNode()
    node.set_private_data("data_list", data_ndarray)
    node.configure_data_access(
        "data_list", LaplaceMechanism(sensitivity=sensitivity, epsilon=1))
    with pytest.raises(IndexError):
        result = node.query("data_list")

    # Query result is wrong data structure: so far, tuples are not allowed
    data_tuple = (1, 2, 3, 4, 5)
    sensitivity = 2
    node = DataNode()
    node.set_private_data("data_tuple", data_tuple)
    node.configure_data_access(
        "data_tuple", LaplaceMechanism(sensitivity=sensitivity, epsilon=1))
    with pytest.raises(NotImplementedError):
        result = node.query("data_tuple")
Пример #24
0
def test_randomize_binary_deterministic():
    array = np.array([0, 1])
    node_single = DataNode()
    node_single.set_private_data(name="A", data=array)
    with pytest.raises(ValueError):
        RandomizedResponseBinary(f0=1, f1=1, epsilon=1)