def compute_privacy_cost_sgld(dp_sgld_params, Ts):
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    ## Compute privacy budget
    N = 1e6
    batch_size = dp_sgld_params['tau']
    noise_sigma = dp_sgld_params['noise_sigma']
    q = batch_size / N
    delta = 2 / N
    rdp_orders = range(2, 500)
    rdp_eps0 = compute_rdp(q, noise_sigma, 1, rdp_orders)
    epsilons = np.zeros(len(Ts))
    for i_T, T in enumerate(Ts):
        epsilons[i_T] = get_privacy_spent(rdp_orders,
                                          rdp_eps0 * T,
                                          target_delta=delta)[0]
    return epsilons, delta
def find_sigma(target_eps, U, L, q, T, target_delta, ant_T=1):
    max_iter = 100
    n_iter = 0
    while True:
        m = (U + L) / 2
        rdp_eps = compute_rdp(q, m, T, range(2, 500))
        eps = get_privacy_spent(
            range(2, 500), rdp_eps, target_delta=target_delta)[0] * 2 * ant_T
        if np.abs(eps - target_eps) < 0.01 and eps > target_eps: return m, eps
        if eps > target_eps:
            L = m
        else:
            U = m
        n_iter += 1
        if n_iter == max_iter:
            break
    print("max nmbr of iter exceed")
    return m, eps
def main():
    # Set DPVI params
    T = 10000
    C = 1.0
    lr = 1e-2
    # set number of mixture components
    female_k = 10
    q = 0.005
    sigma = float(sys.argv[1])
    n_runs = int(sys.argv[2])
    seed = int(sys.argv[3])
    delta = 1e-6
    # Set optimizer
    optimizer = torch.optim.Adam
    ## Set random seed
    npr.seed(seed)
    if use_cuda:
        torch.set_default_tensor_type('torch.cuda.DoubleTensor')
        torch.cuda.manual_seed(seed)
    else:
        torch.set_default_tensor_type('torch.DoubleTensor')
        torch.manual_seed(seed)

    ## Compute privacy budget
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    rdp_orders = range(2, 500)
    rdp_eps = compute_rdp(q, sigma, T, rdp_orders)
    epsilon = 2 * get_privacy_spent(
        rdp_orders, rdp_eps, target_delta=delta / 2)[0]
    print("Epsilon : {}".format(epsilon))

    ## Save parameters
    res_dir = './res/'
    params = {'T':T, 'C':C, 'lr':lr, 'female_k':female_k,\
       'q':q, 'sigma':sigma, 'epsilon':epsilon, 'n_runs':n_runs, 'seed':seed}
    ## Determine filename
    fname_i = 0
    date = datetime.date.today().isoformat()
    fname = '{}_{}'.format(date, seed)
    while True:
        try:
            param_file = open(
                res_dir + 'params_{}_{}.p'.format(fname, np.round(epsilon, 2)),
                'r')
            param_file.close()
            if fname_i == 0: fname += '_({})'.format(fname_i)
            else: fname = fname[:-4] + '_({})'.format(fname_i)
            fname_i += 1
        except:
            break

    pickle.dump(
        params,
        open(res_dir + 'params_{}_{}.p'.format(fname, np.round(epsilon, 2)),
             'wb'))
    learn_counter = count()
    alive_female_models = []
    dead_female_models = []
    out_file = open(
        res_dir + 'out_{}_{}.txt'.format(fname, np.round(epsilon, 2)), 'w')
    for i in range(n_runs):
        start_time = time.time()
        print(learn_counter.__next__())
        # train female and models
        # alives
        alive_female_model = infer(T, C, float(sigma), int(q*len(alive_female_df)),\
         optimizer, lr, alive_female_df, alive_female_variable_types, female_k)
        alive_female_models.append(alive_female_model)
        pickle.dump(alive_female_models, open('./female_models/'+'alive_female_models_{}_{}.p'\
           .format(fname, np.round(epsilon, 2)), 'wb'))
        # deads
        dead_female_model = infer(T, C, float(sigma), int(q*len(dead_female_df)),\
         optimizer, lr, dead_female_df, dead_female_variable_types, female_k)
        dead_female_models.append(dead_female_model)
        pickle.dump(dead_female_models, open('./female_models/'+'dead_female_models_{}_{}.p'\
           .format(fname, np.round(epsilon, 2)), 'wb'))
        stop_time = time.time()
        time_delta = stop_time - start_time
        out_file.writelines(
            "Took {} seconds to learn alive and dead\n".format(time_delta))
        print("Took {} seconds to learn alive and dead\n".format(time_delta))
    out_file.close()
示例#4
0
def main():
    ###  Set number of mixture components (k)
    k = 20
    ## Training parameters
    T = 30000
    C = 1.0
    q = .001
    lr = .001

    ### Pick dimension from argv
    d = int(sys.argv[1])
    ### Compute privacy budget
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    delta = 1e-5
    rdp_orders = range(2, 500)
    sigma = 2.0
    if sigma > 0:
        from privacy.analysis.compute_dp_sgd_privacy import get_privacy_spent, compute_rdp
        rdp_alpha = range(2, 500)
        delta = 1e-5
        print(sigma)
        rdp_eps = compute_rdp(q, sigma, T, rdp_alpha)
        epsilon = 2 * get_privacy_spent(
            rdp_alpha, rdp_eps, target_delta=delta / 2)[0]
    ### Check that epsilon < 1.0
    assert (epsilon < 1.0)

    ### Save log
    date = datetime.date.today().isoformat()
    wall_start = time.time()
    cpu_start = time.clock()
    out_file = open("out_file_{}_{}.txt".format(date, d), "a")
    sys.stdout = out_file
    ### Load carat-data
    import pandas as pd
    app_data = pd.read_csv('../data/subsets/carat_apps_sub{}.dat'.format(d), sep=' ', header=None)\
                .astype('float').values
    N = len(app_data)
    batch_size = int(N * q)
    X_apps = torch.tensor(app_data).view([N, 1, d])
    models = []  ## container to save gen_models
    for run in range(10):
        from torch.optim import Adam
        gen_model = infer(T, C, float(sigma), batch_size, Adam, lr, X_apps, k)
        models.append(gen_model)

    wall_end = time.time()
    cpu_end = time.clock()
    pickle.dump(models, open('models_{}_{}.p'.format(date, d), 'wb'))
    print('Wall time {}'.format(wall_end - wall_start))
    print('CPU time {}'.format(cpu_end - cpu_start))
    out_file.close()
    params = {
        'T': T,
        'C': C,
        'q': q,
        'lr': lr,
        'sigma': sigma,
        'epsilon': epsilon,
        'd': d
    }
    pickle.dump(params, open('params_{}_{}.p'.format(date, d), 'wb'))
示例#5
0
def main():
    # Set DPVI params
    T = 80000
    C = 2.0
    lr = .0005
    q = 0.005
    batch_size = int(q * N)
    sigma = float(sys.argv[1])
    income = sys.argv[2]
    seed = int(sys.argv[3])
    torch.manual_seed(seed)
    npr.seed(seed)
    # Set number of mixture components
    k = 10
    param_dims = OrderedDict()
    for key, value in variable_types.items():
        if key == 'pi_unconstrained':
            param_dims[key] = [k - 1]
        else:
            if value == 'Bernoulli':
                param_dims[key] = [k]
            elif value == 'Categorical':
                param_dims[key] = [k, len(np.unique(data[key]))]
            elif value == 'Beta':
                param_dims[key] = [2, k]

    input_dim = int(np.sum([np.prod(value) for value in param_dims.values()]))
    flat_param_dims = np.array(
        [np.prod(value) for value in param_dims.values()])

    rich_data = data[data['Target'] == 1]
    batch_size_rich = int(q * len(rich_data))
    poor_data = data[data['Target'] == 0]
    batch_size_poor = int(q * len(poor_data))

    ### Save log
    date = datetime.date.today().isoformat()
    wall_start = time.time()
    cpu_start = time.clock()
    out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a")
    sys.stdout = out_file
    print("Sigma : {}".format(sigma))

    ## Containers for models
    models = []

    from torch.optim import Adam as Optimizer
    from dpvi import DPVI
    ## Repeat inference 10 times
    if income == "rich":
        rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_rich = Optimizer(rich_model.parameters(), lr=lr)
        # Init mixture fractions to N(0, exp(-2.0))
        rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            rich_model.reparam.bias.data[:, -(k - 1):])
        rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            rich_model.reparam.weight.data[:, -(k - 1):])
        rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\
          optimizer_rich, C, sigma, variable_types)
        models.append(rich_model_)
    else:
        poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_poor = Optimizer(poor_model.parameters(), lr=lr)
        poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            poor_model.reparam.bias.data[:, -(k - 1):])
        poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            poor_model.reparam.weight.data[:, -(k - 1):])

        poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\
          optimizer_poor, C, sigma, variable_types)
        models.append(poor_model_)
    wall_end = time.time()
    cpu_end = time.clock()
    print('Wall time {}'.format(wall_end - wall_start))
    print('CPU time {}'.format(cpu_end - cpu_start))

    ## Compute privacy budget
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    delta = 1e-5
    rdp_orders = range(2, 500)
    rdp_eps = compute_rdp(q, sigma, T, rdp_orders)
    epsilon = 2 * get_privacy_spent(
        rdp_orders, rdp_eps, target_delta=delta / 2)[0]

    pickle.dump(
        models,
        open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    params = {
        'T': T,
        'C': C,
        'lr': lr,
        'k': k,
        'q': q,
        'sigma': sigma,
        'epsilon': epsilon,
        'seed': seed
    }
    pickle.dump(
        params,
        open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    out_file.close()