示例#1
0
def test_torch_ref_match():

    # Verify if the torch implementation values match the original Numpy implementation.

    num_teachers, num_examples, num_labels = (100, 50, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(
        int)  # fake preds

    indices = (np.random.rand(num_examples) * num_labels).astype(
        int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis_torch(preds,
                                                             indices,
                                                             noise_eps=0.1,
                                                             delta=1e-5)

    data_dep_eps_ref, data_ind_eps_ref = pate.perform_analysis(preds,
                                                               indices,
                                                               noise_eps=0.1,
                                                               delta=1e-5)

    assert torch.isclose(data_dep_eps,
                         torch.tensor(data_dep_eps_ref, dtype=torch.float32))
    assert torch.isclose(data_ind_eps,
                         torch.tensor(data_ind_eps_ref, dtype=torch.float32))
示例#2
0
def test_base_dataset():

    num_teachers, num_examples, num_labels = (100, 50, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int)  # fake preds

    indices = (np.random.rand(num_examples) * num_labels).astype(int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(
        teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5
    )

    assert data_dep_eps < data_ind_eps
示例#3
0
def test_section_1_differential_privacy():
    """This tests the Udacity course content found at
    https://github.com/Udacity/private-ai
    """

    # the number of entries in our database
    num_entries = 5000

    db = torch.rand(num_entries) > 0.5

    db = torch.rand(num_entries) > 0.5

    def get_parallel_db(db, remove_index):
        return torch.cat((db[0:remove_index], db[remove_index + 1:]))

    get_parallel_db(db, 52352)

    def get_parallel_dbs(db):
        parallel_dbs = []

        for i in range(len(db)):
            pdb = get_parallel_db(db, i)
            parallel_dbs.append(pdb)

        return parallel_dbs

    pdbs = get_parallel_dbs(db)

    def create_db_and_parallels(num_entries):
        db = torch.rand(num_entries) > 0.5
        pdbs = get_parallel_dbs(db)

        return db, pdbs

    db, pdbs = create_db_and_parallels(20)

    db, pdbs = create_db_and_parallels(5000)

    def query(db):
        return db.sum()

    full_db_result = query(db)

    sensitivity = 0
    for pdb in pdbs:
        pdb_result = query(pdb)

        db_distance = torch.abs(pdb_result - full_db_result)

        if db_distance > sensitivity:
            sensitivity = db_distance

    def sensitivity(query, n_entries=1000):

        db, pdbs = create_db_and_parallels(n_entries)

        full_db_result = query(db)

        max_distance = 0
        for pdb in pdbs:
            pdb_result = query(pdb)

            db_distance = torch.abs(pdb_result - full_db_result)

            if db_distance > max_distance:
                max_distance = db_distance

        return max_distance

    def query(db):
        return db.float().mean()

    sensitivity(query)

    db, pdbs = create_db_and_parallels(20)

    db

    def query(db, threshold=5):
        return (db.sum() > threshold).float()

    for i in range(10):
        sens_f = sensitivity(query, n_entries=10)
        print(sens_f)

    db, _ = create_db_and_parallels(100)

    pdb = get_parallel_db(db, remove_index=10)

    db[10]

    sum(db)

    # differencing attack using sum query

    sum(db) - sum(pdb)

    # differencing attack using mean query

    (sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

    # differencing attack using threshold

    (sum(db).float() > 49).float() - (sum(pdb).float() > 49).float()

    def query(db):

        true_result = torch.mean(db.float())

        first_coin_flip = (torch.rand(len(db)) > 0.5).float()
        second_coin_flip = (torch.rand(len(db)) > 0.5).float()

        augmented_database = db.float() * first_coin_flip + (
            1 - first_coin_flip) * second_coin_flip

        db_result = torch.mean(augmented_database.float()) * 2 - 0.5

        return db_result, true_result

    db, pdbs = create_db_and_parallels(10)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(1000)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(10000)
    private_result, true_result = query(db)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    def query(db, noise=0.2):

        true_result = torch.mean(db.float())

        first_coin_flip = (torch.rand(len(db)) > noise).float()
        second_coin_flip = (torch.rand(len(db)) > 0.5).float()

        augmented_database = db.float() * first_coin_flip + (
            1 - first_coin_flip) * second_coin_flip

        sk_result = augmented_database.float().mean()

        private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

        return private_result, true_result

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.1)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.2)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.4)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)
    private_result, true_result = query(db, noise=0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(10000)
    private_result, true_result = query(db, noise=0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

    db, pdbs = create_db_and_parallels(100)

    def query(db):
        return torch.sum(db.float())

    # def M(db):
    #     query(db)  # + noise
    #
    query(db)

    epsilon = 0.0001

    import numpy as np

    db, pdbs = create_db_and_parallels(100)

    def sum_query(db):
        return db.sum()

    def laplacian_mechanism(db, query, sensitivity):

        beta = sensitivity / epsilon
        noise = torch.tensor(np.random.laplace(0, beta, 1))

        return query(db) + noise

    def mean_query(db):
        return torch.mean(db.float())

    laplacian_mechanism(db, sum_query, 1)

    laplacian_mechanism(db, mean_query, 1 / 100)

    import numpy as np

    num_teachers = 10  # we're working with 10 partner hospitals
    num_examples = 10000  # the size of OUR dataset
    num_labels = 10  # number of lablels for our classifier

    preds = ((np.random.rand(num_teachers, num_examples) *
              num_labels).astype(int).transpose(1, 0))  # fake predictions

    new_labels = []
    for an_image in preds:

        label_counts = np.bincount(an_image, minlength=num_labels)

        epsilon = 0.1
        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)

        new_labels.append(new_label)

    labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
    counts = np.bincount(labels, minlength=10)
    query_result = np.argmax(counts)
    query_result

    from syft.frameworks.torch.dp import pate

    num_teachers, num_examples, num_labels = (100, 100, 10)
    preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(
        int)  # fake preds
    indices = (np.random.rand(num_examples) * num_labels).astype(
        int)  # true answers

    preds[:, 0:10] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5)

    assert data_dep_eps < data_ind_eps

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5)
    print("Data Independent Epsilon:", data_ind_eps)
    print("Data Dependent Epsilon:", data_dep_eps)

    preds[:, 0:50] *= 0

    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds,
                                                       indices=indices,
                                                       noise_eps=0.1,
                                                       delta=1e-5,
                                                       moments=20)
    print("Data Independent Epsilon:", data_ind_eps)
    print("Data Dependent Epsilon:", data_dep_eps)
示例#4
0
for i, model in enumerate(teacher_models):
	results = predict(model, data_loader)
	preds[i] = results

# add noise to the predicted labels
student_labels = getStudentLabels(preds, epsilon=EPSILON)


##################################################
#				PATE Analysis 				 	 #
##################################################

# perform PATE analysis
data_dep_eps, data_ind_eps = pate.perform_analysis(
	teacher_preds=preds, indices=student_labels, 
	noise_eps=EPSILON, delta=DELTA)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)


##################################################
#	Train student model on modified labels		 #
##################################################

# train student model on the student train data and the 
# calculated labels
student_model = Network()
criterion = nn.NLLLoss()
optimizer = optim.Adam(student_model.parameters(), 
	lr=LEARNING_RATE)