def test_slice_by_correcness(self): input_data = SlicingSpec(entire_dataset=False, by_classification_correctness=True) expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True) output = get_single_slice_specs(input_data) self.assertLen(output, 2) self.assertTrue(_are_all_fields_equal(output[0], expected))
def run_membership_probability_analysis( attack_input: AttackInputData, slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults: """Perform membership probability analysis on all given slice types. Args: attack_input: input data for compute membership probabilities slicing_spec: specifies attack_input slices Returns: the membership probability results. """ attack_input.validate() membership_prob_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) num_classes = None if slicing_spec.by_class: num_classes = attack_input.num_classes input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) membership_prob_results.append( _compute_membership_probability(attack_input_slice)) return MembershipProbabilityResults( membership_prob_results=membership_prob_results)
def test_slice_by_percentiles(self): input_data = SlicingSpec(entire_dataset=False, by_percentiles=True) expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10)) expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60)) output = get_single_slice_specs(input_data) self.assertLen(output, 10) self.assertTrue(_are_all_fields_equal(output[0], expected0)) self.assertTrue(_are_all_fields_equal(output[5], expected5))
def test_slice_by_classes(self): input_data = SlicingSpec(by_class=True) n_classes = 5 expected = [self.ENTIRE_DATASET_SLICE] + [ SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes) ] output = get_single_slice_specs(input_data, n_classes) self.assertTrue(_are_lists_equal(output, expected))
def test_run_attack_by_slice(self): result = mia.run_attacks( get_test_input(100, 100), SlicingSpec(by_class=True), (AttackType.THRESHOLD_ATTACK,)) self.assertLen(result.single_attack_results, 6) expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2) self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
def test_run_attack_data_size(self): result = mia.run_attacks( get_test_input(100, 80), SlicingSpec(by_class=True), (AttackType.THRESHOLD_ATTACK,)) self.assertEqual(result.single_attack_results[0].data_size, DataSize(ntrain=100, ntest=80)) self.assertEqual(result.single_attack_results[3].data_size, DataSize(ntrain=20, ntest=16))
def main(unused_argv): # Load training and test data. x_train, y_train, x_test, y_test = load_cifar10() # Get model, optimizer and specify loss. model = small_cnn() optimizer = tf.keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # Get callback for membership inference attack. mia_callback = MembershipInferenceCallback( (x_train, y_train), (x_test, y_test), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers) # Train model with Keras model.fit(x_train, y_train, epochs=FLAGS.epochs, validation_data=(x_test, y_test), batch_size=FLAGS.batch_size, callbacks=[mia_callback], verbose=2) print('End of training attack:') attack_results = run_attack_on_keras_model( model, (x_train, y_train), (x_test, y_test), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ]) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) ]))
def test_slicing_by_multiple_features(self): input_data = SlicingSpec(entire_dataset=True, by_class=True, by_percentiles=True, by_classification_correctness=True) n_classes = 10 expected_slices = n_classes expected_slices += 1 # entire dataset slice expected_slices += 10 # percentiles slices expected_slices += 2 # correcness classification slices output = get_single_slice_specs(input_data, n_classes) self.assertLen(output, expected_slices)
def run_attacks(attack_input: AttackInputData, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = ( AttackType.THRESHOLD_ATTACK, ), privacy_report_metadata: PrivacyReportMetadata = None, balance_attacker_training: bool = True, min_num_samples: int = 1) -> AttackResults: """Runs membership inference attacks on a classification model. It runs attacks specified by attack_types on each attack_input slice which is specified by slicing_spec. Args: attack_input: input data for running an attack slicing_spec: specifies attack_input slices to run attack on attack_types: attacks to run privacy_report_metadata: the metadata of the model under attack. balance_attacker_training: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. min_num_samples: minimum number of examples in either training or test data. Returns: the attack result. """ attack_input.validate() attack_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) num_classes = None if slicing_spec.by_class: num_classes = attack_input.num_classes input_slice_specs = get_single_slice_specs(slicing_spec, num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) for attack_type in attack_types: attack_result = _run_attack(attack_input_slice, attack_type, balance_attacker_training, min_num_samples) if attack_result is not None: attack_results.append(attack_result) privacy_report_metadata = _compute_missing_privacy_report_metadata( privacy_report_metadata, attack_input) return AttackResults(single_attack_results=attack_results, privacy_report_metadata=privacy_report_metadata)
def run_attacks( attack_input: AttackInputData, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, ) ) -> AttackResults: """Run all attacks.""" attack_input.validate() attack_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) input_slice_specs = get_single_slice_specs(slicing_spec, attack_input.num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) for attack_type in attack_types: attack_results.append(run_attack(attack_input_slice, attack_type)) return AttackResults(single_attack_results=attack_results)
def main(unused_argv): # Load training and test data. train_data, train_labels, test_data, test_labels = load_mnist() # Get model, optimizer and specify loss. model = cnn_model() optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy']) # Get callback for membership inference attack. mia_callback = MembershipInferenceCallback( (train_data, train_labels), (test_data, test_labels), attack_types=[AttackType.THRESHOLD_ATTACK], tensorboard_dir=FLAGS.model_dir) # Train model with Keras model.fit(train_data, train_labels, epochs=FLAGS.epochs, validation_data=(test_data, test_labels), batch_size=FLAGS.batch_size, callbacks=[mia_callback], verbose=2) print('End of training attack:') attack_results = run_attack_on_keras_model( model, (train_data, train_labels), (test_data, test_labels), slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ]) attack_properties, attack_values = get_flattened_attack_metrics( attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join(p), r) for p, r in zip(attack_properties, attack_values) ]))
def run_attacks( attack_input: AttackInputData, slicing_spec: SlicingSpec = None, attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, ), privacy_report_metadata: PrivacyReportMetadata = None ) -> AttackResults: """Runs membership inference attacks on a classification model. It runs attacks specified by attack_types on each attack_input slice which is specified by slicing_spec. Args: attack_input: input data for running an attack slicing_spec: specifies attack_input slices to run attack on attack_types: attacks to run privacy_report_metadata: the metadata of the model under attack. Returns: the attack result. """ attack_input.validate() attack_results = [] if slicing_spec is None: slicing_spec = SlicingSpec(entire_dataset=True) input_slice_specs = get_single_slice_specs(slicing_spec, attack_input.num_classes) for single_slice_spec in input_slice_specs: attack_input_slice = get_slice(attack_input, single_slice_spec) for attack_type in attack_types: attack_results.append(_run_attack(attack_input_slice, attack_type)) privacy_report_metadata = _compute_missing_privacy_report_metadata( privacy_report_metadata, attack_input) return AttackResults(single_attack_results=attack_results, privacy_report_metadata=privacy_report_metadata)
def test_run_attacks_size(self): result = mia.run_attacks( get_test_input(100, 100), SlicingSpec(), (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION)) self.assertLen(result.single_attack_results, 2)
def test_no_slices(self): input_data = SlicingSpec(entire_dataset=False) expected = [] output = get_single_slice_specs(input_data) self.assertTrue(_are_lists_equal(output, expected))
def test_trained_attacks_logits_only_size(self): result = mia.run_attacks(get_test_input_logits_only(100, 100), SlicingSpec(), (AttackType.LOGISTIC_REGRESSION, )) self.assertLen(result.single_attack_results, 1)
def crossentropy(true_labels, predictions): return keras.backend.eval( keras.losses.binary_crossentropy( keras.backend.variable(to_categorical(true_labels, num_clusters)), keras.backend.variable(predictions))) attack_results = mia.run_attacks( AttackInputData( labels_train=training_labels, labels_test=test_labels, logits_train=training_pred, logits_test=test_pred, loss_train=crossentropy(training_labels, training_pred), loss_test=crossentropy(test_labels, test_pred)), SlicingSpec(entire_dataset=True, by_class=True), attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION)) # Example of saving the results to the file and loading them back. with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "results.pickle") attack_results.save(filepath) loaded_results = AttackResults.load(filepath) # Print attack metrics for attack_result in attack_results.single_attack_results: print("Slice: %s" % attack_result.slice_spec) print("Attack type: %s" % attack_result.attack_type) print("AUC: %.2f" % attack_result.roc_curve.get_auc()) print("Attacker advantage: %.2f\n" %
def main(unused_argv): tf.logging.set_verbosity(tf.logging.ERROR) logging.set_verbosity(logging.ERROR) logging.set_stderrthreshold(logging.ERROR) logging.get_absl_handler().use_absl_log_file() # Load training and test data. train_data, train_labels, test_data, test_labels = load_mnist() # Instantiate the tf.Estimator. mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn, model_dir=FLAGS.model_dir) # A function to construct input_fn given (data, label), to be used by the # membership inference training hook. def input_fn_constructor(x, y): return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) with tf.Graph().as_default(): # Get a summary writer for the hook to write to tensorboard. # Can set summary_writer to None if not needed. if FLAGS.model_dir: summary_writer = tf.summary.FileWriter(FLAGS.model_dir) else: summary_writer = None mia_hook = MembershipInferenceTrainingHook( mnist_classifier, (train_data, train_labels), (test_data, test_labels), input_fn_constructor, attack_types=[AttackType.THRESHOLD_ATTACK], writer=summary_writer) # Create tf.Estimator input functions for the training and test data. train_input_fn = tf.estimator.inputs.numpy_input_fn( x={'x': train_data}, y=train_labels, batch_size=FLAGS.batch_size, num_epochs=FLAGS.epochs, shuffle=True) eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_data}, y=test_labels, num_epochs=1, shuffle=False) # Training loop. steps_per_epoch = 60000 // FLAGS.batch_size for epoch in range(1, FLAGS.epochs + 1): # Train the model, with the membership inference hook. mnist_classifier.train(input_fn=train_input_fn, steps=steps_per_epoch, hooks=[mia_hook]) # Evaluate the model and print results eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) test_accuracy = eval_results['accuracy'] print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) print('End of training attack') attack_results = run_attack_on_tf_estimator_model( mnist_classifier, (train_data, train_labels), (test_data, test_labels), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ]) attack_properties, attack_values = get_all_attack_results(attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join(p), r) for p, r in zip(attack_properties, attack_values) ]))
def test_entire_dataset(self): input_data = SlicingSpec() expected = [self.ENTIRE_DATASET_SLICE] output = get_single_slice_specs(input_data) self.assertTrue(_are_lists_equal(output, expected))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.ERROR) logging.set_verbosity(logging.ERROR) logging.set_stderrthreshold(logging.ERROR) logging.get_absl_handler().use_absl_log_file() # Load training and test data. x_train, y_train, x_test, y_test = load_cifar10() # Instantiate the tf.Estimator. classifier = tf.estimator.Estimator(model_fn=small_cnn_fn, model_dir=FLAGS.model_dir) # A function to construct input_fn given (data, label), to be used by the # membership inference training hook. def input_fn_constructor(x, y): return tf.estimator.inputs.numpy_input_fn(x={'x': x}, y=y, shuffle=False) # Get hook for membership inference attack. mia_hook = MembershipInferenceTrainingHook( classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ], tensorboard_dir=FLAGS.model_dir, tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers) # Create tf.Estimator input functions for the training and test data. train_input_fn = tf.estimator.inputs.numpy_input_fn( x={'x': x_train}, y=y_train, batch_size=FLAGS.batch_size, num_epochs=FLAGS.epochs, shuffle=True) eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': x_test}, y=y_test, num_epochs=1, shuffle=False) # Training loop. steps_per_epoch = 50000 // FLAGS.batch_size for epoch in range(1, FLAGS.epochs + 1): # Train the model, with the membership inference hook. classifier.train(input_fn=train_input_fn, steps=steps_per_epoch, hooks=[mia_hook]) # Evaluate the model and print results eval_results = classifier.evaluate(input_fn=eval_input_fn) test_accuracy = eval_results['accuracy'] print('Test accuracy after %d epochs is: %.3f' % (epoch, test_accuracy)) print('End of training attack') attack_results = run_attack_on_tf_estimator_model( classifier, (x_train, y_train), (x_test, y_test), input_fn_constructor, slicing_spec=SlicingSpec(entire_dataset=True, by_class=True), attack_types=[ AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS ]) att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics( attack_results) print('\n'.join([ ' %s: %.4f' % (', '.join([s, t, m]), v) for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values) ]))
def main(unused_argv): epoch_results = AttackResultsCollection([]) num_epochs = 2 models = { "two layer model": two_layer_model, "three layer model": three_layer_model, } for model_name in models: # Incrementally train the model and store privacy metrics every num_epochs. for i in range(1, 6): models[model_name].fit( training_features, to_categorical(training_labels, num_clusters), validation_data=(test_features, to_categorical(test_labels, num_clusters)), batch_size=64, epochs=num_epochs, shuffle=True) training_pred = models[model_name].predict(training_features) test_pred = models[model_name].predict(test_features) # Add metadata to generate a privacy report. privacy_report_metadata = PrivacyReportMetadata( accuracy_train=metrics.accuracy_score( training_labels, np.argmax(training_pred, axis=1)), accuracy_test=metrics.accuracy_score( test_labels, np.argmax(test_pred, axis=1)), epoch_num=num_epochs * i, model_variant_label=model_name) attack_results = mia.run_attacks( AttackInputData(labels_train=training_labels, labels_test=test_labels, probs_train=training_pred, probs_test=test_pred, loss_train=crossentropy( training_labels, training_pred), loss_test=crossentropy(test_labels, test_pred)), SlicingSpec(entire_dataset=True, by_class=True), attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION), privacy_report_metadata=privacy_report_metadata) epoch_results.append(attack_results) # Generate privacy reports epoch_figure = privacy_report.plot_by_epochs( epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC]) epoch_figure.show() privacy_utility_figure = privacy_report.plot_privacy_vs_accuracy_single_model( epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC]) privacy_utility_figure.show() # Example of saving the results to the file and loading them back. with tempfile.TemporaryDirectory() as tmpdirname: filepath = os.path.join(tmpdirname, "results.pickle") attack_results.save(filepath) loaded_results = AttackResults.load(filepath) print(loaded_results.summary(by_slices=False)) # Print attack metrics for attack_result in attack_results.single_attack_results: print("Slice: %s" % attack_result.slice_spec) print("Attack type: %s" % attack_result.attack_type) print("AUC: %.2f" % attack_result.roc_curve.get_auc()) print("Attacker advantage: %.2f\n" % attack_result.roc_curve.get_attacker_advantage()) max_auc_attacker = attack_results.get_result_with_max_auc() print("Attack type with max AUC: %s, AUC of %.2f" % (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc())) max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage( ) print("Attack type with max advantage: %s, Attacker advantage of %.2f" % (max_advantage_attacker.attack_type, max_advantage_attacker.roc_curve.get_attacker_advantage())) # Print summary print("Summary without slices: \n") print(attack_results.summary(by_slices=False)) print("Summary by slices: \n") print(attack_results.summary(by_slices=True)) # Print pandas data frame print("Pandas frame: \n") pd.set_option("display.max_rows", None, "display.max_columns", None) print(attack_results.calculate_pd_dataframe()) # Example of ROC curve plotting. figure = plotting.plot_roc_curve( attack_results.single_attack_results[0].roc_curve) figure.show() plt.show()