示例#1
0
def single_release_comp(sigma_1, sigma_2=None, delta=1e-5):
    """ input arguments """
    acct = rdp_acct.anaRDPacct()

    acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_1}, x), prob=1.)
    if sigma_2 is not None:
        acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_2}, x), prob=1.)

    print("Privacy loss is", acct.get_eps(delta))
示例#2
0
def CGF_func(sigma1, sigma2, sigma3, sigma4, num_Clust, num_iter_EM):

    # gaussian 1 and 2 are for the discrimintor update (i.e., two terms for applying DP-SGD)
    func_gaussian_1 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma1}, x)
    func_gaussian_2 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma2}, x)

    # gaussian 3 and 4 are for EM updates for MoG
    func_gaussian_3 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma3}, x)
    func_gaussian_4 = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma4}, x)

    func = lambda x: func_gaussian_1(x) + func_gaussian_2(x) + num_Clust*num_iter_EM*(func_gaussian_3(x) + func_gaussian_4(x))
    return func
示例#3
0
    def __init__(self, sigma, name='Gaussian',
                 RDP_off=False, approxDP_off=False, fdp_off=True,
                 use_basic_RDP_to_approxDP_conversion=False,
                 use_fDP_based_RDP_to_approxDP_conversion=False):
        # the sigma parameter is the std of the noise divide by the l2 sensitivity
        Mechanism.__init__(self)

        self.name = name # When composing
        self.params = {'sigma': sigma} # This will be useful for the Calibrator
        # TODO: should a generic unspecified mechanism have a name and a param dictionary?

        self.delta0 = 0
        if not RDP_off:
            new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)
            if use_fDP_based_RDP_to_approxDP_conversion:
                # This setting is slightly more complex, which involves converting RDP to fDP,
                # then to eps-delta-DP via the duality
                self.propagate_updates(new_rdp, 'RDP', fDP_based_conversion=True)
            elif use_basic_RDP_to_approxDP_conversion:
                self.propagate_updates(new_rdp, 'RDP', BBGHS_conversion=False)
            else:
                # This is the default setting with fast computation of RDP to approx-DP
                self.propagate_updates(new_rdp, 'RDP')

        if not approxDP_off: # Direct implementation of approxDP
            new_approxdp = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x)
            self.propagate_updates(new_approxdp,'approxDP_func')

        if not fdp_off: # Direct implementation of fDP
            fun1 = lambda x: fdp_bank.log_one_minus_fdp_gaussian({'sigma': sigma}, x)
            fun2 = lambda x: fdp_bank.log_neg_fdp_grad_gaussian({'sigma': sigma}, x)
            self.propagate_updates([fun1,fun2],'fDP_and_grad_log')
            # overwrite the fdp computation with the direct computation
            self.fdp = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
def conservative_analysis():
    """ input arguments """

    # (1) privacy parameters for four types of Gaussian mechanisms
    sigma = 10.

    # (2) desired delta level
    delta = 1e-5

    n_epochs = 10  # 5 for DP-MERF and 17 for DP-MERF+AE
    batch_size = 64  # the same across experiments
    acct = rdp_acct.anaRDPacct()

    n_data_by_class = [
        5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949
    ]

    start_time = time.time()
    subset_count = 0
    for n_data in n_data_by_class:

        steps_per_epoch = int(np.ceil(n_data / batch_size))
        n_steps = steps_per_epoch * n_epochs
        sampling_rate = batch_size / n_data

        epoch_last_batch_size = n_data % batch_size
        epoch_last_sampling_rate = epoch_last_batch_size / n_data

        # old_time = start_time
        old_time = time.time()
        for i in range(1, n_steps + 1):
            sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate
            acct.compose_subsampled_mechanism(
                lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x),
                sampling_rate_i)
            if i % steps_per_epoch == 0:
                new_time = time.time()
                epochs_done = i // steps_per_epoch
                t_used = new_time - old_time
                t_total = new_time - start_time
                t_total_min = t_total / 60
                print(
                    f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)'
                )
                old_time = new_time

            if i == n_steps:
                pre_eps_time = time.time()
                subset_count += 1
                print("[", i, "]Privacy loss is", (acct.get_eps(delta)))
                post_eps_time = time.time()
                print('time to get_eps: ', post_eps_time - pre_eps_time)
                old_time = post_eps_time
        print(f'data subset {subset_count} done')
示例#5
0
def main(config):
    delta = 1e-5
    batch_size = config['batchsize']
    prob = 1. / config['num_discriminators']  # subsampling rate
    n_steps = config['iterations']  # training iterations
    sigma = 0.4859#config['noise_multiplier']  # noise scale
    func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)

    acct = rdp_acct.anaRDPacct()
    acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size)
    epsilon = acct.get_eps(delta)
    print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
示例#6
0
def main():
    """ input arguments """

    # (1) privacy parameters for four types of Gaussian mechanisms
    sigma = 1.2

    # (2) desired delta level
    delta = 1e-5

    # (5) number of training steps
    n_epochs = 10  # 5 for DP-MERF and 17 for DP-MERF+AE
    batch_size = 64  # the same across experiments

    dataset = "intrusion"

    if dataset == "epileptic":
        n_data = 8049
    elif dataset == "isolet":
        n_data = 4366
    elif dataset == "adult":
        n_data = 11077
    elif dataset == "census":
        n_data = 199523
    elif dataset == "cervical":
        n_data = 753
    elif dataset == "credit":
        n_data = 2668
    elif dataset == "intrusion":
        n_data = 394021
    elif dataset == "covtype":
        n_data = 9217

    steps_per_epoch = n_data // batch_size
    n_steps = steps_per_epoch * n_epochs
    # n_steps = 1

    # (6) sampling rate
    prob = batch_size / n_data
    # prob = 1
    """ end of input arguments """
    """ now use autodp to calculate the cumulative privacy loss """
    # declare the moment accountants
    acct = rdp_acct.anaRDPacct()

    eps_seq = []

    for i in range(1, n_steps + 1):
        acct.compose_subsampled_mechanism(
            lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), prob)
        if i % steps_per_epoch == 0 or i == n_steps:
            eps_seq.append(acct.get_eps(delta))
            print("[", i, "]Privacy loss is", (eps_seq[-1]))
def conservative_analysis_syn2d(sigma, delta, n_epochs, batch_size,
                                n_data_per_class, n_classes,
                                print_intermediate_results):
    """ input arguments """

    # (2) desired delta level
    # delta = 1e-5

    # n_epochs = 20
    # batch_size = 256
    acct = rdp_acct.anaRDPacct()

    n_data_by_class = [n_data_per_class] * n_classes

    start_time = time.time()
    subset_count = 0
    for model_idx, n_data in enumerate(n_data_by_class):

        steps_per_epoch = int(np.ceil(n_data / batch_size))
        n_steps = steps_per_epoch * n_epochs
        sampling_rate = batch_size / n_data

        epoch_last_batch_size = n_data % batch_size
        epoch_last_sampling_rate = epoch_last_batch_size / n_data

        # old_time = start_time
        old_time = time.time()
        for i in range(1, n_steps + 1):
            sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate
            acct.compose_subsampled_mechanism(
                lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x),
                sampling_rate_i)
            if i % steps_per_epoch == 0:
                new_time = time.time()
                epochs_done = i // steps_per_epoch
                t_used = new_time - old_time
                t_total = new_time - start_time
                t_total_min = t_total / 60
                print(
                    f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)'
                )
                old_time = new_time

            if i == n_steps and (print_intermediate_results
                                 or model_idx + 1 == len(n_data_by_class)):
                pre_eps_time = time.time()
                subset_count += 1
                print("[", i, "]Privacy loss is", (acct.get_eps(delta)))
                post_eps_time = time.time()
                print(f'time to get_eps: {post_eps_time - pre_eps_time:.2f}')
                old_time = post_eps_time
        print(f'data subset {subset_count} done')
示例#8
0
    def __init__(self, sigma=None, name='Gaussian'):
        # the sigma parameter is the std of the noise divide by the l2 sensitivity
        Mechanism.__init__(self)

        self.name = name # When composing
        self.params = {'sigma': sigma} # This will be useful for the Calibrator
        self.delta0 = 0
        if sigma is not None:
            new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)
            self.propagate_updates(new_rdp, 'RDP')
            # Overwrite the approxDP and fDP with their direct computation
            self.approxDP = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x)
            self.fDP = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
示例#9
0
def direct_readout(ar):
    delta = 1e-5
    batch_size = ar.batchsize
    prob = 1. / ar.num_discriminators  # subsampling rate
    n_steps = ar.iterations  # training iterations
    print(n_steps, batch_size, prob)
    sigma = ar.noise_multiplier  # noise scale
    func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)

    acct = rdp_acct.anaRDPacct()
    acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size)
    epsilon = acct.get_eps(delta)
    print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
示例#10
0
    def __init__(self, hyperparams, net, params, loss_func, model_ctx,
                 accountant):
        self._hyperparams = hyperparams

        # Store network and parameter info
        self._net = net
        self._params = params
        self._loss_func = loss_func
        self._model_ctx = model_ctx

        # Store privacy info
        self._accountant = accountant
        # self._cgf_func = lambda x: cgfbank.CGF_gaussian({'sigma': self._hyperparams['z']}, x)
        self._cgf_func = lambda x: rdp_bank.RDP_gaussian(
            {'sigma': self._hyperparams['z']}, x)

        # Keep track of the number of steps (i.e., # of updates to the params vector)
        self._step = 0

        # Use a batch_size that fits in GPU memory
        self._batch_size = self._compute_good_batch_size()
示例#11
0
import network
#from utils import Hamming_Score as hamming_accuracy
import os
from dataset_loader import ImageDataset
import aggregation
import autodp
from autodp import rdp_bank, dp_acct, rdp_acct, privacy_calibrator
#from utils import Hamming_Score as hamming_accuracy
from utils import hamming_precision as hamming_accuracy
from knn_attribute import tau_limit
import sys
sys.path.append('../dataset/duke')
from datafolder.folder import Test_Dataset
nb_teachers = config.nb_teachers
acct = rdp_acct.anaRDPacct()
gaussian = lambda x: rdp_bank.RDP_gaussian(
    {'sigma': int(config.gau_scale / config.tau)}, x)
#acct.compose_mechanism(gaussian,coeff=config.tau*config.stdnt_share)
#print('privacy loss', acct.get_eps(config.delta))
dataset_dict = {
    'market': 'Market-1501',
    'duke': 'DukeMTMC-reID',
}


def ensemble_preds(nb_teachers, stdnt_data):
    """
  Given a dataset, a number of teachers, and some input data, this helper
  function queries each teacher for predictions on the data and returns
  all predictions in a single array. (That can then be aggregated into
  one single prediction per input using aggregation.py (cf. function
  prepare_student_data() below)
示例#12
0
tf.flags.DEFINE_integer('stdnt_share', 1000,
                        'Student share (last index) of the test data')
tf.flags.DEFINE_integer('extra', 0,
                        'remove extra samples from training to test')
tf.flags.DEFINE_bool('pca', True, 'if true then apply pca as preprocessing')
tf.flags.DEFINE_bool('knn', 1, 'if 1 then replace dnn with knn')
tf.flags.DEFINE_bool('vat', False,
                     'whether use vat to lable query, only use after vat')
tf.flags.DEFINE_boolean('deeper', False, 'Activate deeper CNN model')

FLAGS = tf.flags.FLAGS
prob = 0.2  # subsample probability for i
acct = rdp_acct.anaRDPacct()
delta = 1e-8
sigma = FLAGS.gau_scale  #gaussian parameter
gaussian = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)


def convert_vat(test_data, test_labels, noisy_labels):

    log = {}
    log['labeled_train_images'] = test_data[:FLAGS.stdnt_share]
    log['labeled_train_labels'] = noisy_labels
    log['train_images'] = test_data[FLAGS.stdnt_share:-1000]
    log['train_labels'] = test_labels[FLAGS.stdnt_share:-1000]
    #use the remaining 1000 point for test
    log['test_images'] = test_data[:-1000]
    print('test_images.size', log['test_images'].shape)
    log['test_labels'] = test_labels[:-1000]
    file_vat = "../vat_tf/log/" + FLAGS.dataset + '_query=' + str(
        FLAGS.stdnt_share) + '.pkl'
示例#13
0
def get_eps_gaussian(sigma, delta):
    """ This function calculates the eps for Gaussian Mech given sigma and delta"""
    assert (delta >= 0)
    func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)
    return get_eps_rdp(func, delta)
示例#14
0
    def __init__(self, sigma, name='Gaussian',
                 RDP_off=False, approxDP_off=False, fdp_off=True,
                 use_basic_RDP_to_approxDP_conversion=False,
                 use_fDP_based_RDP_to_approxDP_conversion=False, phi_off=True):
        """
        sigma: the std of the noise divide by the l2 sensitivity.
        coeff: the number of composition
        RDP_off: if False, then we characterize the mechanism using RDP.
        fdp_off: if False, then we characterize the mechanism using fdp.
        phi_off: if False, then we characterize the mechanism using phi-function.
        """
        Mechanism.__init__(self)

        self.name = name # When composing
        self.params = {'sigma': sigma} # This will be useful for the Calibrator
        # TODO: should a generic unspecified mechanism have a name and a param dictionary?

        self.delta0 = 0

        if not phi_off:
            """
            Apply phi function to analyze Gaussian mechanism.
            the CDF of privacy loss R.V. is computed using an integration (see details in cdf_bank) through Levy Theorem.
            If self.exactPhi = True, the algorithm provides an exact characterization.
            """
            self.exactPhi = True
            log_phi = lambda x: phi_bank.phi_gaussian({'sigma': sigma}, x)
            self.log_phi_p = self.log_phi_q = log_phi

            # self.cdf tracks the cdf of log(p/q) and the cdf of log(q/p).
            self.propagate_updates((log_phi, log_phi), 'log_phi')
            """
            Moreover, we know the closed-form expression of the CDF of the privacy loss RV
               privacy loss RV distribution l=log(p/q) ~ N(1/2\sigma^2, 1/sigma^2)
            We can also use the following closed-form cdf directly.
            """
            #sigma = sigma*1.0/np.sqrt(coeff)
            #mean = 1.0 / (2.0 * sigma ** 2)
            #std = 1.0 / (sigma)
            #cdf = lambda x: norm.cdf((x - mean) / std)
            #self.propagate_updates(cdf, 'cdf', take_log=True)


        if not RDP_off:
            new_rdp = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x)
            if use_fDP_based_RDP_to_approxDP_conversion:
                # This setting is slightly more complex, which involves converting RDP to fDP,
                # then to eps-delta-DP via the duality
                self.propagate_updates(new_rdp, 'RDP', fDP_based_conversion=True)
            elif use_basic_RDP_to_approxDP_conversion:
                self.propagate_updates(new_rdp, 'RDP', BBGHS_conversion=False)
            else:
                # This is the default setting with fast computation of RDP to approx-DP
                self.propagate_updates(new_rdp, 'RDP')

        if not approxDP_off: # Direct implementation of approxDP
            new_approxdp = lambda x: dp_bank.get_eps_ana_gaussian(sigma, x)
            self.propagate_updates(new_approxdp,'approxDP_func')

        if not fdp_off: # Direct implementation of fDP
            fun1 = lambda x: fdp_bank.log_one_minus_fdp_gaussian({'sigma': sigma}, x)
            fun2 = lambda x: fdp_bank.log_neg_fdp_grad_gaussian({'sigma': sigma}, x)
            self.propagate_updates([fun1,fun2],'fDP_and_grad_log')
            # overwrite the fdp computation with the direct computation
            self.fdp = lambda x: fdp_bank.fDP_gaussian({'sigma': sigma}, x)
def calibrate_epsilon(params, delta):  #lemma_8
    # We use approximate-CDP for the composition, and then calculate the \epsilon parameters as a function of \delta

    # Input 'params' should contain the following fields
    # params['config'] keeps the integer denoting which configuration it is
    # params['eps_sigma']  keeps the epsilon parameter used by the Laplace mechanism when releasing M2's eigenvalue
    # params['delta_sigma'] denotes the failure probability for the high-probability upper bound of LS
    # params['eps_gamma'] and params['delta_gamma']  are similarly for M3's eigenvalue
    # params['Gaussian'] contains a list of tuples each containing (sensitivity,  variance)
    # this is because each config often release more than one quantities
    config = params['config']
    eps_edge_dist = params['eps_dist']
    acct = rdp_acct.anaRDPacct()

    if not config:
        return 0

    delta0 = 0

    if config == 'config4':
        eps_e9 = eps_edge_dist['e9']
        eps_sigma = eps_e9 / 4
        eps_gamma = eps_e9 / 4
        delta_sigma = delta / 4
        delta_gamma = delta / 4
        delta0 = delta_sigma + delta_gamma
        acct.compose_mechanism(
            lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x))
        acct.compose_mechanism(
            lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x))

    if config == 'config3':
        eps_e7 = eps_edge_dist['e7']
        eps_sigma = eps_e7 / 3
        eps_gamma = eps_e7 / 3
        delta_sigma = delta / 3
        delta_gamma = delta / 3
        delta0 = delta_sigma + delta_gamma
        acct.compose_mechanism(
            lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x))
        acct.compose_mechanism(
            lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x))

    if config == 'config2':
        eps_e6 = eps_edge_dist['e6']
        eps_sigma = eps_e6 / 2
        delta_sigma = delta / 2
        delta0 = delta_sigma
        acct.compose_mechanism(
            lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x))

    print('delta0:', delta0)

    if delta0 >= delta:
        return np.inf

    for sensitivity, variance in params['gaussian']:
        ## often we pre-emptively calculate sensitivities,
        ## so they might not be zero in places where we aren;t adding noise.
        ## variance provides a better check for this.
        if sensitivity == 0 or variance == 0:
            continue

        std = np.sqrt(variance)
        # CDP of gaussian mechanism conditioning on the event is the same as its RDP.
        acct.compose_mechanism(lambda x: rdp_bank.RDP_gaussian(
            {'sigma': std / max(sensitivity,
                                np.finfo(np.float32).eps)}, x))

    # This privacy calcluation follows from Lemma 8.8 of Bun et al. (2016) https://arxiv.org/pdf/1605.02065.pdf
    return acct.get_eps((delta - delta0) / (1 - delta0))
示例#16
0
        loss_fun = loss_fun * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1)
    return acc.get()[1], loss_fun


# ## Now let's try attaching a privacy accountant to this data set

# declare a moment accountant from pydiffpriv
DPobject = rdp_acct.anaRDPacct()

# Specify privacy specific inputs
thresh = 4.0  # limit the norm of individual gradient
sigma = thresh

delta = 1e-5

func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma / thresh}, x)

# ## We now specify the parameters needed for learning

#
epochs = 10
learning_rate = .1

n = train_data.num_data
batchsz = 100  #

count = 0
niter = 0
moving_loss = 0

grads = dpdl_utils.initialize_grad(params, ctx=ctx)