Пример #1
0
  def test_get_customized_loss(self, loss_function_using_logits, expected_train,
                               expected_test):

    def fake_loss(x, y):
      return 2 * x + y

    attack_input = AttackInputData(
        logits_train=np.array([
            123.,
        ]),
        logits_test=np.array([
            123.,
        ]),
        probs_train=np.array([
            456.,
        ]),
        probs_test=np.array([
            456.,
        ]),
        labels_train=np.array([1.]),
        labels_test=np.array([-1.]),
        loss_function=fake_loss,
        loss_function_using_logits=loss_function_using_logits,
    )
    np.testing.assert_allclose(attack_input.get_loss_train(), expected_train)
    np.testing.assert_allclose(attack_input.get_loss_test(), expected_test)
def _compute_membership_probability(
        attack_input: AttackInputData,
        num_bins: int = 15) -> SingleMembershipProbabilityResult:
    """Computes each individual point's likelihood of being a member (denoted as privacy risk score in https://arxiv.org/abs/2003.10595).

  For an individual sample, its privacy risk score is computed as the posterior
  probability of being in the training set
  after observing its prediction output by the target machine learning model.

  Args:
    attack_input: input data for compute membership probability
    num_bins: the number of bins used to compute the training/test histogram

  Returns:
    membership probability results
  """

    # Uses the provided loss or entropy. Otherwise computes the loss.
    if attack_input.loss_train is not None and attack_input.loss_test is not None:
        train_values = attack_input.loss_train
        test_values = attack_input.loss_test
    elif attack_input.entropy_train is not None and attack_input.entropy_test is not None:
        train_values = attack_input.entropy_train
        test_values = attack_input.entropy_test
    else:
        train_values = attack_input.get_loss_train()
        test_values = attack_input.get_loss_test()

    # Compute the histogram in the log scale
    small_value = 1e-10
    train_values = np.maximum(train_values, small_value)
    test_values = np.maximum(test_values, small_value)

    min_value = min(train_values.min(), test_values.min())
    max_value = max(train_values.max(), test_values.max())
    bins_hist = np.logspace(np.log10(min_value), np.log10(max_value),
                            num_bins + 1)

    train_hist, _ = np.histogram(train_values, bins=bins_hist)
    train_hist = train_hist / (len(train_values) + 0.0)
    train_hist_indices = np.fmin(np.digitize(train_values, bins=bins_hist),
                                 num_bins) - 1

    test_hist, _ = np.histogram(test_values, bins=bins_hist)
    test_hist = test_hist / (len(test_values) + 0.0)
    test_hist_indices = np.fmin(np.digitize(test_values, bins=bins_hist),
                                num_bins) - 1

    combined_hist = train_hist + test_hist
    combined_hist[combined_hist == 0] = small_value
    membership_prob_list = train_hist / (combined_hist + 0.0)
    train_membership_probs = membership_prob_list[train_hist_indices]
    test_membership_probs = membership_prob_list[test_hist_indices]

    return SingleMembershipProbabilityResult(
        slice_spec=_get_slice_spec(attack_input),
        train_membership_probs=train_membership_probs,
        test_membership_probs=test_membership_probs)
Пример #3
0
  def test_get_loss_explicitly_provided(self):
    attack_input = AttackInputData(
        loss_train=np.array([1.0, 3.0, 6.0]),
        loss_test=np.array([1.0, 4.0, 6.0]))

    np.testing.assert_equal(attack_input.get_loss_train().tolist(),
                            [1.0, 3.0, 6.0])
    np.testing.assert_equal(attack_input.get_loss_test().tolist(),
                            [1.0, 4.0, 6.0])
Пример #4
0
  def test_get_xe_loss_from_probs(self):
    attack_input = AttackInputData(
        probs_train=np.array([[0.1, 0.1, 0.8], [0.8, 0.2, 0]]),
        probs_test=np.array([[0, 0.0001, 0.9999], [0.07, 0.18, 0.75]]),
        labels_train=np.array([1, 0]),
        labels_test=np.array([0, 2]))

    np.testing.assert_allclose(
        attack_input.get_loss_train(), [2.30258509, 0.2231436], atol=1e-7)
    np.testing.assert_allclose(
        attack_input.get_loss_test(), [18.42068074, 0.28768207], atol=1e-7)
Пример #5
0
  def test_get_xe_loss_from_logits(self):
    attack_input = AttackInputData(
        logits_train=np.array([[-0.3, 1.5, 0.2], [2, 3, 0.5]]),
        logits_test=np.array([[2, 0.3, 0.2], [0.3, -0.5, 0.2]]),
        labels_train=np.array([1, 0]),
        labels_test=np.array([0, 2]))

    np.testing.assert_allclose(
        attack_input.get_loss_train(), [0.36313551, 1.37153903], atol=1e-7)
    np.testing.assert_allclose(
        attack_input.get_loss_test(), [0.29860897, 0.95618669], atol=1e-7)
Пример #6
0
 def test_get_binary_xe_loss_from_logits(self):
   attack_input = AttackInputData(
       logits_train=np.array([-10, -5, 0., 5, 10]),
       logits_test=np.array([-10, -5, 0., 5, 10]),
       labels_train=np.zeros((5,)),
       labels_test=np.ones((5,)),
       loss_function_using_logits=True)
   expected_loss0 = np.array([0.000045398, 0.006715348, 0.6931471825, 5, 10])
   np.testing.assert_allclose(
       attack_input.get_loss_train(), expected_loss0, rtol=1e-2)
   np.testing.assert_allclose(
       attack_input.get_loss_test(), expected_loss0[::-1], rtol=1e-2)
def _run_threshold_attack(attack_input: AttackInputData):
    """Runs a threshold attack on loss."""
    ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
    loss_train = attack_input.get_loss_train()
    loss_test = attack_input.get_loss_test()
    if loss_train is None or loss_test is None:
        raise ValueError(
            'Not possible to run threshold attack without losses.')
    fpr, tpr, thresholds = metrics.roc_curve(
        np.concatenate((np.zeros(ntrain), np.ones(ntest))),
        np.concatenate((loss_train, loss_test)))

    roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds)

    return SingleAttackResult(
        slice_spec=_get_slice_spec(attack_input),
        data_size=DataSize(ntrain=ntrain, ntest=ntest),
        attack_type=AttackType.THRESHOLD_ATTACK,
        membership_scores_train=attack_input.get_loss_train(),
        membership_scores_test=attack_input.get_loss_test(),
        roc_curve=roc_curve)
Пример #8
0
def create_attacker_data(attack_input_data: data_structures.AttackInputData,
                         balance: bool = True) -> AttackerData:
    """Prepare AttackInputData to train ML attackers.

  Combines logits and losses and performs a random train-test split.

  Args:
    attack_input_data: Original AttackInputData
    balance: Whether the training and test sets for the membership inference
      attacker should have a balanced (roughly equal) number of samples from the
      training and test sets used to develop the model under attack.

  Returns:
    AttackerData.
  """
    attack_input_train = _column_stack(attack_input_data.logits_or_probs_train,
                                       attack_input_data.get_loss_train())
    attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
                                      attack_input_data.get_loss_test())

    ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
    features_all = np.concatenate((attack_input_train, attack_input_test))
    labels_all = np.concatenate((np.zeros(ntrain), np.ones(ntest)))

    fold_indices = np.arange(ntrain + ntest)
    left_out_indices = np.asarray([], dtype=np.int32)

    if balance:
        idx_train, idx_test = range(ntrain), range(ntrain, ntrain + ntest)
        min_size = min(ntrain, ntest)
        if ntrain > min_size:
            left_out_size = ntrain - min_size
            perm_train = np.random.permutation(idx_train)  # shuffle training
            left_out_indices = perm_train[:left_out_size]
            fold_indices = np.concatenate(
                (perm_train[left_out_size:], idx_test))
        elif ntest > min_size:
            left_out_size = ntest - min_size
            perm_test = np.random.permutation(idx_test)  # shuffle test
            left_out_indices = perm_test[:left_out_size]
            fold_indices = np.concatenate(
                (perm_test[left_out_size:], idx_train))

    # Shuffle indices for the downstream attackers.
    fold_indices = np.random.permutation(fold_indices)

    return AttackerData(features_all=features_all,
                        labels_all=labels_all,
                        fold_indices=fold_indices,
                        left_out_indices=left_out_indices,
                        data_size=data_structures.DataSize(ntrain=ntrain,
                                                           ntest=ntest))
Пример #9
0
 def test_default_loss_function_using_logits(self, logits, probs, expected):
   """Tests for `loss_function_using_logits = None`. Should prefer logits."""
   attack_input = AttackInputData(
       logits_train=logits,
       logits_test=logits,
       probs_train=probs,
       probs_test=probs,
       labels_train=np.array([1, 0.]),
       labels_test=np.array([1, 0.]),
       loss_function=LossFunction.SQUARED,
   )
   np.testing.assert_allclose(attack_input.get_loss_train(), expected)
   np.testing.assert_allclose(attack_input.get_loss_test(), expected)
Пример #10
0
 def test_get_squared_loss(self, loss_function_using_logits, expected_train,
                           expected_test):
   attack_input = AttackInputData(
       logits_train=np.array([0, 0.]),
       logits_test=np.array([0, 0.]),
       probs_train=np.array([1, 1.]),
       probs_test=np.array([1, 1.]),
       labels_train=np.array([1, 0.]),
       labels_test=np.array([0, 2.]),
       loss_function=LossFunction.SQUARED,
       loss_function_using_logits=loss_function_using_logits,
   )
   np.testing.assert_allclose(attack_input.get_loss_train(), expected_train)
   np.testing.assert_allclose(attack_input.get_loss_test(), expected_test)
Пример #11
0
def _slice_by_percentiles(data: AttackInputData, from_percentile: float,
                          to_percentile: float):
    """Slices samples by loss percentiles."""

    # Find from_percentile and to_percentile percentiles in losses.
    loss_train = data.get_loss_train()
    loss_test = data.get_loss_test()
    losses = np.concatenate((loss_train, loss_test))
    from_loss = np.percentile(losses, from_percentile)
    to_loss = np.percentile(losses, to_percentile)

    idx_train = (from_loss <= loss_train) & (loss_train <= to_loss)
    idx_test = (from_loss <= loss_test) & (loss_test <= to_loss)

    return _slice_data_by_indices(data, idx_train, idx_test)
def _compute_missing_privacy_report_metadata(
        metadata: PrivacyReportMetadata,
        attack_input: AttackInputData) -> PrivacyReportMetadata:
    """Populates metadata fields if they are missing."""
    if metadata is None:
        metadata = PrivacyReportMetadata()
    if metadata.accuracy_train is None:
        metadata.accuracy_train = _get_accuracy(attack_input.logits_train,
                                                attack_input.labels_train)
    if metadata.accuracy_test is None:
        metadata.accuracy_test = _get_accuracy(attack_input.logits_test,
                                               attack_input.labels_test)
    loss_train = attack_input.get_loss_train()
    loss_test = attack_input.get_loss_test()
    if metadata.loss_train is None and loss_train is not None:
        metadata.loss_train = np.average(loss_train)
    if metadata.loss_test is None and loss_test is not None:
        metadata.loss_test = np.average(loss_test)
    return metadata
Пример #13
0
  def test_get_binary_xe_loss_from_probs(self):
    attack_input = AttackInputData(
        probs_train=np.array([0.2, 0.7, 0.1, 0.99, 0.002, 0.008]),
        probs_test=np.array([0.2, 0.7, 0.1, 0.99, 0.002, 0.008]),
        labels_train=np.zeros((6,)),
        labels_test=np.ones((6,)),
        loss_function_using_logits=False)

    expected_loss0 = np.array([
        0.2231435513, 1.2039728043, 0.1053605157, 4.6051701860, 0.0020020027,
        0.0080321717
    ])
    expected_loss1 = np.array([
        1.6094379124, 0.3566749439, 2.3025850930, 0.0100503359, 6.2146080984,
        4.8283137373
    ])
    np.testing.assert_allclose(
        attack_input.get_loss_train(), expected_loss0, atol=1e-7)
    np.testing.assert_allclose(
        attack_input.get_loss_test(), expected_loss1, atol=1e-7)