def _run_attack(attack_input: AttackInputData,
                attack_type: AttackType,
                balance_attacker_training: bool = True,
                min_num_samples: int = 1):
    """Runs membership inference attacks for specified input and type.

  Args:
    attack_input: input data for running an attack
    attack_type: the attack to run
    balance_attacker_training: Whether the training and test sets for the
          membership inference attacker should have a balanced (roughly equal)
          number of samples from the training and test sets used to develop
          the model under attack.
    min_num_samples: minimum number of examples in either training or test data.

  Returns:
    the attack result.
  """
    attack_input.validate()
    if min(attack_input.get_train_size(),
           attack_input.get_test_size()) < min_num_samples:
        return None

    if attack_type.is_trained_attack:
        return _run_trained_attack(attack_input, attack_type,
                                   balance_attacker_training)
    if attack_type == AttackType.THRESHOLD_ENTROPY_ATTACK:
        return _run_threshold_entropy_attack(attack_input)
    return _run_threshold_attack(attack_input)
示例#2
0
def create_attacker_data(attack_input_data: AttackInputData,
                         test_fraction: float = 0.25) -> AttackerData:
    """Prepare AttackInputData to train ML attackers.

  Combines logits and losses and performs a random train-test split.

  Args:
    attack_input_data: Original AttackInputData
    test_fraction: Fraction of the dataset to include in the test split.

  Returns:
    AttackerData.
  """
    attack_input_train = _column_stack(attack_input_data.logits_or_probs_train,
                                       attack_input_data.get_loss_train())
    attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
                                      attack_input_data.get_loss_test())

    features_all = np.concatenate((attack_input_train, attack_input_test))

    labels_all = np.concatenate(
        ((np.zeros(attack_input_data.get_train_size())),
         (np.ones(attack_input_data.get_test_size()))))

    # Perform a train-test split
    features_train, features_test, \
    is_training_labels_train, is_training_labels_test = \
      model_selection.train_test_split(
          features_all, labels_all, test_size=test_fraction)
    return AttackerData(features_train, is_training_labels_train,
                        features_test, is_training_labels_test)
示例#3
0
    def test_get_probs_sizes(self):
        attack_input = AttackInputData(probs_train=np.array([[0.1, 0.1, 0.8],
                                                             [0.8, 0.2, 0]]),
                                       probs_test=np.array(
                                           [[0, 0.0001, 0.9999]]),
                                       labels_train=np.array([1, 0]),
                                       labels_test=np.array([0]))

        np.testing.assert_equal(attack_input.get_train_size(), 2)
        np.testing.assert_equal(attack_input.get_test_size(), 1)
示例#4
0
def _run_threshold_entropy_attack(attack_input: AttackInputData):
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(attack_input.get_train_size()),
                        np.ones(attack_input.get_test_size()))),
        np.concatenate((attack_input.get_entropy_train(),
                        attack_input.get_entropy_test())))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(slice_spec=_get_slice_spec(attack_input),
                              attack_type=AttackType.THRESHOLD_ENTROPY_ATTACK,
                              roc_curve=roc_curve)
def _run_threshold_attack(attack_input: AttackInputData):
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate(
            (attack_input.get_loss_train(), attack_input.get_loss_test())))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)
示例#6
0
def create_attacker_data(attack_input_data: AttackInputData,
                         test_fraction: float = 0.25,
                         balance: bool = True) -> AttackerData:
    """Prepare AttackInputData to train ML attackers.

  Combines logits and losses and performs a random train-test split.

  Args:
    attack_input_data: Original AttackInputData
    test_fraction: Fraction of the dataset to include in the test split.
    balance: Whether the training and test sets for the membership inference
              attacker should have a balanced (roughly equal) number of samples
              from the training and test sets used to develop the model
              under attack.

  Returns:
    AttackerData.
  """
    attack_input_train = _column_stack(attack_input_data.logits_or_probs_train,
                                       attack_input_data.get_loss_train())
    attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
                                      attack_input_data.get_loss_test())

    if balance:
        min_size = min(attack_input_data.get_train_size(),
                       attack_input_data.get_test_size())
        attack_input_train = _sample_multidimensional_array(
            attack_input_train, min_size)
        attack_input_test = _sample_multidimensional_array(
            attack_input_test, min_size)
    ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]

    features_all = np.concatenate((attack_input_train, attack_input_test))

    labels_all = np.concatenate(((np.zeros(ntrain)), (np.ones(ntest))))

    # Perform a train-test split
    features_train, features_test, \
    is_training_labels_train, is_training_labels_test = \
      model_selection.train_test_split(
          features_all, labels_all, test_size=test_fraction, stratify=labels_all)
    return AttackerData(features_train, is_training_labels_train,
                        features_test, is_training_labels_test,
                        DataSize(ntrain=ntrain, ntest=ntest))
示例#7
0
def _run_threshold_attack(attack_input: AttackInputData):
    """Runs a threshold attack on loss."""
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    loss_train = attack_input.get_loss_train()
    loss_test = attack_input.get_loss_test()
    if loss_train is None or loss_test is None:
        raise ValueError(
            'Not possible to run threshold attack without losses.')
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate((loss_train, loss_test)))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=-attack_input.get_loss_train(),
        membership_scores_test=-attack_input.get_loss_test(),
        roc_curve=roc_curve)