Пример #1
0
 def test_slice_by_correcness(self):
     input_data = SlicingSpec(entire_dataset=False,
                              by_classification_correctness=True)
     expected = SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True)
     output = get_single_slice_specs(input_data)
     self.assertLen(output, 2)
     self.assertTrue(_are_all_fields_equal(output[0], expected))
def run_membership_probability_analysis(
        attack_input: AttackInputData,
        slicing_spec: SlicingSpec = None) -> MembershipProbabilityResults:
    """Perform membership probability analysis on all given slice types.

  Args:
    attack_input: input data for compute membership probabilities
    slicing_spec: specifies attack_input slices

  Returns:
    the membership probability results.
  """
    attack_input.validate()
    membership_prob_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        membership_prob_results.append(
            _compute_membership_probability(attack_input_slice))

    return MembershipProbabilityResults(
        membership_prob_results=membership_prob_results)
Пример #3
0
 def test_slice_by_percentiles(self):
     input_data = SlicingSpec(entire_dataset=False, by_percentiles=True)
     expected0 = SingleSliceSpec(SlicingFeature.PERCENTILE, (0, 10))
     expected5 = SingleSliceSpec(SlicingFeature.PERCENTILE, (50, 60))
     output = get_single_slice_specs(input_data)
     self.assertLen(output, 10)
     self.assertTrue(_are_all_fields_equal(output[0], expected0))
     self.assertTrue(_are_all_fields_equal(output[5], expected5))
Пример #4
0
 def test_slice_by_classes(self):
     input_data = SlicingSpec(by_class=True)
     n_classes = 5
     expected = [self.ENTIRE_DATASET_SLICE] + [
         SingleSliceSpec(SlicingFeature.CLASS, c) for c in range(n_classes)
     ]
     output = get_single_slice_specs(input_data, n_classes)
     self.assertTrue(_are_lists_equal(output, expected))
  def test_run_attack_by_slice(self):
    result = mia.run_attacks(
        get_test_input(100, 100), SlicingSpec(by_class=True),
        (AttackType.THRESHOLD_ATTACK,))

    self.assertLen(result.single_attack_results, 6)
    expected_slice = SingleSliceSpec(SlicingFeature.CLASS, 2)
    self.assertEqual(result.single_attack_results[3].slice_spec, expected_slice)
 def test_run_attack_data_size(self):
   result = mia.run_attacks(
       get_test_input(100, 80), SlicingSpec(by_class=True),
       (AttackType.THRESHOLD_ATTACK,))
   self.assertEqual(result.single_attack_results[0].data_size,
                    DataSize(ntrain=100, ntest=80))
   self.assertEqual(result.single_attack_results[3].data_size,
                    DataSize(ntrain=20, ntest=16))
Пример #7
0
def main(unused_argv):
    # Load training and test data.
    x_train, y_train, x_test, y_test = load_cifar10()

    # Get model, optimizer and specify loss.
    model = small_cnn()
    optimizer = tf.keras.optimizers.SGD(lr=FLAGS.learning_rate, momentum=0.9)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    # Get callback for membership inference attack.
    mia_callback = MembershipInferenceCallback(
        (x_train, y_train), (x_test, y_test),
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ],
        tensorboard_dir=FLAGS.model_dir,
        tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)

    # Train model with Keras
    model.fit(x_train,
              y_train,
              epochs=FLAGS.epochs,
              validation_data=(x_test, y_test),
              batch_size=FLAGS.batch_size,
              callbacks=[mia_callback],
              verbose=2)

    print('End of training attack:')
    attack_results = run_attack_on_keras_model(
        model, (x_train, y_train), (x_test, y_test),
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ])
    att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
        attack_results)
    print('\n'.join([
        '  %s: %.4f' % (', '.join([s, t, m]), v)
        for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
    ]))
Пример #8
0
 def test_slicing_by_multiple_features(self):
     input_data = SlicingSpec(entire_dataset=True,
                              by_class=True,
                              by_percentiles=True,
                              by_classification_correctness=True)
     n_classes = 10
     expected_slices = n_classes
     expected_slices += 1  # entire dataset slice
     expected_slices += 10  # percentiles slices
     expected_slices += 2  # correcness classification slices
     output = get_single_slice_specs(input_data, n_classes)
     self.assertLen(output, expected_slices)
def run_attacks(attack_input: AttackInputData,
                slicing_spec: SlicingSpec = None,
                attack_types: Iterable[AttackType] = (
                    AttackType.THRESHOLD_ATTACK, ),
                privacy_report_metadata: PrivacyReportMetadata = None,
                balance_attacker_training: bool = True,
                min_num_samples: int = 1) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.
    balance_attacker_training: Whether the training and test sets for the
          membership inference attacker should have a balanced (roughly equal)
          number of samples from the training and test sets used to develop
          the model under attack.
    min_num_samples: minimum number of examples in either training or test data.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    num_classes = None
    if slicing_spec.by_class:
        num_classes = attack_input.num_classes
    input_slice_specs = get_single_slice_specs(slicing_spec, num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_result = _run_attack(attack_input_slice, attack_type,
                                        balance_attacker_training,
                                        min_num_samples)
            if attack_result is not None:
                attack_results.append(attack_result)

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
Пример #10
0
def run_attacks(
    attack_input: AttackInputData,
    slicing_spec: SlicingSpec = None,
    attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, )
) -> AttackResults:
    """Run all attacks."""
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    input_slice_specs = get_single_slice_specs(slicing_spec,
                                               attack_input.num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_results.append(run_attack(attack_input_slice, attack_type))

    return AttackResults(single_attack_results=attack_results)
Пример #11
0
def main(unused_argv):
    # Load training and test data.
    train_data, train_labels, test_data, test_labels = load_mnist()

    # Get model, optimizer and specify loss.
    model = cnn_model()
    optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    # Get callback for membership inference attack.
    mia_callback = MembershipInferenceCallback(
        (train_data, train_labels), (test_data, test_labels),
        attack_types=[AttackType.THRESHOLD_ATTACK],
        tensorboard_dir=FLAGS.model_dir)

    # Train model with Keras
    model.fit(train_data,
              train_labels,
              epochs=FLAGS.epochs,
              validation_data=(test_data, test_labels),
              batch_size=FLAGS.batch_size,
              callbacks=[mia_callback],
              verbose=2)

    print('End of training attack:')
    attack_results = run_attack_on_keras_model(
        model, (train_data, train_labels), (test_data, test_labels),
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ])

    attack_properties, attack_values = get_flattened_attack_metrics(
        attack_results)
    print('\n'.join([
        '  %s: %.4f' % (', '.join(p), r)
        for p, r in zip(attack_properties, attack_values)
    ]))
Пример #12
0
def run_attacks(
        attack_input: AttackInputData,
        slicing_spec: SlicingSpec = None,
        attack_types: Iterable[AttackType] = (AttackType.THRESHOLD_ATTACK, ),
        privacy_report_metadata: PrivacyReportMetadata = None
) -> AttackResults:
    """Runs membership inference attacks on a classification model.

  It runs attacks specified by attack_types on each attack_input slice which is
   specified by slicing_spec.

  Args:
    attack_input: input data for running an attack
    slicing_spec: specifies attack_input slices to run attack on
    attack_types: attacks to run
    privacy_report_metadata: the metadata of the model under attack.

  Returns:
    the attack result.
  """
    attack_input.validate()
    attack_results = []

    if slicing_spec is None:
        slicing_spec = SlicingSpec(entire_dataset=True)
    input_slice_specs = get_single_slice_specs(slicing_spec,
                                               attack_input.num_classes)
    for single_slice_spec in input_slice_specs:
        attack_input_slice = get_slice(attack_input, single_slice_spec)
        for attack_type in attack_types:
            attack_results.append(_run_attack(attack_input_slice, attack_type))

    privacy_report_metadata = _compute_missing_privacy_report_metadata(
        privacy_report_metadata, attack_input)

    return AttackResults(single_attack_results=attack_results,
                         privacy_report_metadata=privacy_report_metadata)
    def test_run_attacks_size(self):
        result = mia.run_attacks(
            get_test_input(100, 100), SlicingSpec(),
            (AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))

        self.assertLen(result.single_attack_results, 2)
Пример #14
0
 def test_no_slices(self):
     input_data = SlicingSpec(entire_dataset=False)
     expected = []
     output = get_single_slice_specs(input_data)
     self.assertTrue(_are_lists_equal(output, expected))
    def test_trained_attacks_logits_only_size(self):
        result = mia.run_attacks(get_test_input_logits_only(100, 100),
                                 SlicingSpec(),
                                 (AttackType.LOGISTIC_REGRESSION, ))

        self.assertLen(result.single_attack_results, 1)
Пример #16
0
def crossentropy(true_labels, predictions):
  return keras.backend.eval(
      keras.losses.binary_crossentropy(
          keras.backend.variable(to_categorical(true_labels, num_clusters)),
          keras.backend.variable(predictions)))


attack_results = mia.run_attacks(
    AttackInputData(
        labels_train=training_labels,
        labels_test=test_labels,
        logits_train=training_pred,
        logits_test=test_pred,
        loss_train=crossentropy(training_labels, training_pred),
        loss_test=crossentropy(test_labels, test_pred)),
    SlicingSpec(entire_dataset=True, by_class=True),
    attack_types=(AttackType.THRESHOLD_ATTACK, AttackType.LOGISTIC_REGRESSION))

# Example of saving the results to the file and loading them back.
with tempfile.TemporaryDirectory() as tmpdirname:
  filepath = os.path.join(tmpdirname, "results.pickle")
  attack_results.save(filepath)
  loaded_results = AttackResults.load(filepath)

# Print attack metrics
for attack_result in attack_results.single_attack_results:
  print("Slice: %s" % attack_result.slice_spec)
  print("Attack type: %s" % attack_result.attack_type)
  print("AUC: %.2f" % attack_result.roc_curve.get_auc())

  print("Attacker advantage: %.2f\n" %
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.ERROR)
    logging.set_verbosity(logging.ERROR)
    logging.set_stderrthreshold(logging.ERROR)
    logging.get_absl_handler().use_absl_log_file()

    # Load training and test data.
    train_data, train_labels, test_data, test_labels = load_mnist()

    # Instantiate the tf.Estimator.
    mnist_classifier = tf.estimator.Estimator(model_fn=cnn_model_fn,
                                              model_dir=FLAGS.model_dir)

    # A function to construct input_fn given (data, label), to be used by the
    # membership inference training hook.
    def input_fn_constructor(x, y):
        return tf.estimator.inputs.numpy_input_fn(x={'x': x},
                                                  y=y,
                                                  shuffle=False)

    with tf.Graph().as_default():
        # Get a summary writer for the hook to write to tensorboard.
        # Can set summary_writer to None if not needed.
        if FLAGS.model_dir:
            summary_writer = tf.summary.FileWriter(FLAGS.model_dir)
        else:
            summary_writer = None
        mia_hook = MembershipInferenceTrainingHook(
            mnist_classifier, (train_data, train_labels),
            (test_data, test_labels),
            input_fn_constructor,
            attack_types=[AttackType.THRESHOLD_ATTACK],
            writer=summary_writer)

    # Create tf.Estimator input functions for the training and test data.
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': train_data},
        y=train_labels,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.epochs,
        shuffle=True)
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_data},
                                                       y=test_labels,
                                                       num_epochs=1,
                                                       shuffle=False)

    # Training loop.
    steps_per_epoch = 60000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        # Train the model, with the membership inference hook.
        mnist_classifier.train(input_fn=train_input_fn,
                               steps=steps_per_epoch,
                               hooks=[mia_hook])

        # Evaluate the model and print results
        eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

    print('End of training attack')
    attack_results = run_attack_on_tf_estimator_model(
        mnist_classifier, (train_data, train_labels), (test_data, test_labels),
        input_fn_constructor,
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ])
    attack_properties, attack_values = get_all_attack_results(attack_results)
    print('\n'.join([
        '  %s: %.4f' % (', '.join(p), r)
        for p, r in zip(attack_properties, attack_values)
    ]))
Пример #18
0
 def test_entire_dataset(self):
     input_data = SlicingSpec()
     expected = [self.ENTIRE_DATASET_SLICE]
     output = get_single_slice_specs(input_data)
     self.assertTrue(_are_lists_equal(output, expected))
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.ERROR)
    logging.set_verbosity(logging.ERROR)
    logging.set_stderrthreshold(logging.ERROR)
    logging.get_absl_handler().use_absl_log_file()

    # Load training and test data.
    x_train, y_train, x_test, y_test = load_cifar10()

    # Instantiate the tf.Estimator.
    classifier = tf.estimator.Estimator(model_fn=small_cnn_fn,
                                        model_dir=FLAGS.model_dir)

    # A function to construct input_fn given (data, label), to be used by the
    # membership inference training hook.
    def input_fn_constructor(x, y):
        return tf.estimator.inputs.numpy_input_fn(x={'x': x},
                                                  y=y,
                                                  shuffle=False)

    # Get hook for membership inference attack.
    mia_hook = MembershipInferenceTrainingHook(
        classifier, (x_train, y_train), (x_test, y_test),
        input_fn_constructor,
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ],
        tensorboard_dir=FLAGS.model_dir,
        tensorboard_merge_classifiers=FLAGS.tensorboard_merge_classifiers)

    # Create tf.Estimator input functions for the training and test data.
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={'x': x_train},
        y=y_train,
        batch_size=FLAGS.batch_size,
        num_epochs=FLAGS.epochs,
        shuffle=True)
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': x_test},
                                                       y=y_test,
                                                       num_epochs=1,
                                                       shuffle=False)

    # Training loop.
    steps_per_epoch = 50000 // FLAGS.batch_size
    for epoch in range(1, FLAGS.epochs + 1):
        # Train the model, with the membership inference hook.
        classifier.train(input_fn=train_input_fn,
                         steps=steps_per_epoch,
                         hooks=[mia_hook])

        # Evaluate the model and print results
        eval_results = classifier.evaluate(input_fn=eval_input_fn)
        test_accuracy = eval_results['accuracy']
        print('Test accuracy after %d epochs is: %.3f' %
              (epoch, test_accuracy))

    print('End of training attack')
    attack_results = run_attack_on_tf_estimator_model(
        classifier, (x_train, y_train), (x_test, y_test),
        input_fn_constructor,
        slicing_spec=SlicingSpec(entire_dataset=True, by_class=True),
        attack_types=[
            AttackType.THRESHOLD_ATTACK, AttackType.K_NEAREST_NEIGHBORS
        ])
    att_types, att_slices, att_metrics, att_values = get_flattened_attack_metrics(
        attack_results)
    print('\n'.join([
        '  %s: %.4f' % (', '.join([s, t, m]), v)
        for t, s, m, v in zip(att_types, att_slices, att_metrics, att_values)
    ]))
Пример #20
0
def main(unused_argv):
    epoch_results = AttackResultsCollection([])

    num_epochs = 2
    models = {
        "two layer model": two_layer_model,
        "three layer model": three_layer_model,
    }
    for model_name in models:
        # Incrementally train the model and store privacy metrics every num_epochs.
        for i in range(1, 6):
            models[model_name].fit(
                training_features,
                to_categorical(training_labels, num_clusters),
                validation_data=(test_features,
                                 to_categorical(test_labels, num_clusters)),
                batch_size=64,
                epochs=num_epochs,
                shuffle=True)

            training_pred = models[model_name].predict(training_features)
            test_pred = models[model_name].predict(test_features)

            # Add metadata to generate a privacy report.
            privacy_report_metadata = PrivacyReportMetadata(
                accuracy_train=metrics.accuracy_score(
                    training_labels, np.argmax(training_pred, axis=1)),
                accuracy_test=metrics.accuracy_score(
                    test_labels, np.argmax(test_pred, axis=1)),
                epoch_num=num_epochs * i,
                model_variant_label=model_name)

            attack_results = mia.run_attacks(
                AttackInputData(labels_train=training_labels,
                                labels_test=test_labels,
                                probs_train=training_pred,
                                probs_test=test_pred,
                                loss_train=crossentropy(
                                    training_labels, training_pred),
                                loss_test=crossentropy(test_labels,
                                                       test_pred)),
                SlicingSpec(entire_dataset=True, by_class=True),
                attack_types=(AttackType.THRESHOLD_ATTACK,
                              AttackType.LOGISTIC_REGRESSION),
                privacy_report_metadata=privacy_report_metadata)
            epoch_results.append(attack_results)

    # Generate privacy reports
    epoch_figure = privacy_report.plot_by_epochs(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    epoch_figure.show()
    privacy_utility_figure = privacy_report.plot_privacy_vs_accuracy_single_model(
        epoch_results, [PrivacyMetric.ATTACKER_ADVANTAGE, PrivacyMetric.AUC])
    privacy_utility_figure.show()

    # Example of saving the results to the file and loading them back.
    with tempfile.TemporaryDirectory() as tmpdirname:
        filepath = os.path.join(tmpdirname, "results.pickle")
        attack_results.save(filepath)
        loaded_results = AttackResults.load(filepath)
        print(loaded_results.summary(by_slices=False))

    # Print attack metrics
    for attack_result in attack_results.single_attack_results:
        print("Slice: %s" % attack_result.slice_spec)
        print("Attack type: %s" % attack_result.attack_type)
        print("AUC: %.2f" % attack_result.roc_curve.get_auc())

        print("Attacker advantage: %.2f\n" %
              attack_result.roc_curve.get_attacker_advantage())

    max_auc_attacker = attack_results.get_result_with_max_auc()
    print("Attack type with max AUC: %s, AUC of %.2f" %
          (max_auc_attacker.attack_type, max_auc_attacker.roc_curve.get_auc()))

    max_advantage_attacker = attack_results.get_result_with_max_attacker_advantage(
    )
    print("Attack type with max advantage: %s, Attacker advantage of %.2f" %
          (max_advantage_attacker.attack_type,
           max_advantage_attacker.roc_curve.get_attacker_advantage()))

    # Print summary
    print("Summary without slices: \n")
    print(attack_results.summary(by_slices=False))

    print("Summary by slices: \n")
    print(attack_results.summary(by_slices=True))

    # Print pandas data frame
    print("Pandas frame: \n")
    pd.set_option("display.max_rows", None, "display.max_columns", None)
    print(attack_results.calculate_pd_dataframe())

    # Example of ROC curve plotting.
    figure = plotting.plot_roc_curve(
        attack_results.single_attack_results[0].roc_curve)
    figure.show()
    plt.show()