def test_attacker_advantage_perfect_classifier(self): roc = RocCurve( tpr=np.array([0.0, 1.0, 1.0]), fpr=np.array([1.0, 1.0, 0.0]), thresholds=np.array([0, 1, 2])) self.assertEqual(roc.get_auc(), 1.0)
def test_auc_random_classifier(self): roc = RocCurve( tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2])) self.assertEqual(roc.get_auc(), 0.5)
def __init__(self, *args, **kwargs): super(PrivacyReportTest, self).__init__(*args, **kwargs) # Classifier that achieves an AUC of 0.5. self.imperfect_classifier_result = SingleAttackResult( slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK, roc_curve=RocCurve( tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2])), data_size=DataSize(ntrain=1, ntest=1)) # Classifier that achieves an AUC of 1.0. self.perfect_classifier_result = SingleAttackResult( slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK, roc_curve=RocCurve( tpr=np.array([0.0, 1.0, 1.0]), fpr=np.array([1.0, 1.0, 0.0]), thresholds=np.array([0, 1, 2])), data_size=DataSize(ntrain=1, ntest=1)) self.results_epoch_0 = AttackResults( single_attack_results=[self.imperfect_classifier_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.4, accuracy_test=0.3, epoch_num=0, model_variant_label='default')) self.results_epoch_10 = AttackResults( single_attack_results=[self.imperfect_classifier_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.4, accuracy_test=0.3, epoch_num=10, model_variant_label='default')) self.results_epoch_15 = AttackResults( single_attack_results=[self.perfect_classifier_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.5, accuracy_test=0.4, epoch_num=15, model_variant_label='default')) self.results_epoch_15_model_2 = AttackResults( single_attack_results=[self.perfect_classifier_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.6, accuracy_test=0.7, epoch_num=15, model_variant_label='model 2')) self.attack_results_no_metadata = AttackResults( single_attack_results=[self.perfect_classifier_result])
def __init__(self, *args, **kwargs): super(AttackResultsCollectionTest, self).__init__(*args, **kwargs) self.some_attack_result = SingleAttackResult( slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK, roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2]))) self.results_epoch_10 = AttackResults( single_attack_results=[self.some_attack_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.4, accuracy_test=0.3, epoch_num=10, model_variant_label='default')) self.results_epoch_15 = AttackResults( single_attack_results=[self.some_attack_result], privacy_report_metadata=PrivacyReportMetadata( accuracy_train=0.5, accuracy_test=0.4, epoch_num=15, model_variant_label='default')) self.attack_results_no_metadata = AttackResults( single_attack_results=[self.some_attack_result]) self.collection_with_metadata = AttackResultsCollection( [self.results_epoch_10, self.results_epoch_15]) self.collection_no_metadata = AttackResultsCollection( [self.attack_results_no_metadata, self.attack_results_no_metadata])
def run_trained_attack(attack_input: AttackInputData, attack_type: AttackType): """Classification attack done by ML models.""" attacker = None if attack_type == AttackType.LOGISTIC_REGRESSION: attacker = models.LogisticRegressionAttacker() elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON: attacker = models.MultilayerPerceptronAttacker() elif attack_type == AttackType.RANDOM_FOREST: attacker = models.RandomForestAttacker() elif attack_type == AttackType.K_NEAREST_NEIGHBORS: attacker = models.KNearestNeighborsAttacker() else: raise NotImplementedError('Attack type %s not implemented yet.' % attack_type) prepared_attacker_data = models.create_attacker_data(attack_input) attacker.train_model(prepared_attacker_data.features_train, prepared_attacker_data.is_training_labels_train) # Run the attacker on (permuted) test examples. predictions_test = attacker.predict(prepared_attacker_data.features_test) # Generate ROC curves with predictions. fpr, tpr, thresholds = metrics.roc_curve( prepared_attacker_data.is_training_labels_test, predictions_test) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) return SingleAttackResult(slice_spec=_get_slice_spec(attack_input), attack_type=attack_type, roc_curve=roc_curve)
def __init__(self, *args, **kwargs): super(AttackResultsTest, self).__init__(*args, **kwargs) # ROC curve of a perfect classifier self.perfect_classifier_result = SingleAttackResult( slice_spec=SingleSliceSpec(SlicingFeature.CORRECTLY_CLASSIFIED, True), attack_type=AttackType.THRESHOLD_ATTACK, roc_curve=RocCurve(tpr=np.array([0.0, 1.0, 1.0]), fpr=np.array([1.0, 1.0, 0.0]), thresholds=np.array([0, 1, 2]))) # ROC curve of a random classifier self.random_classifier_result = SingleAttackResult( slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK, roc_curve=RocCurve(tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2])))
def test_attacker_advantage_random_classifier(self): roc = RocCurve(tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2])) result = SingleAttackResult(roc_curve=roc, slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK) self.assertEqual(result.get_attacker_advantage(), 0.0)
def test_auc_random_classifier(self): roc = RocCurve(tpr=np.array([0.0, 0.5, 1.0]), fpr=np.array([0.0, 0.5, 1.0]), thresholds=np.array([0, 1, 2])) result = SingleAttackResult(roc_curve=roc, slice_spec=SingleSliceSpec(None), attack_type=AttackType.THRESHOLD_ATTACK, data_size=DataSize(ntrain=1, ntest=1)) self.assertEqual(result.get_auc(), 0.5)
def run_seq2seq_attack( attack_input: Seq2SeqAttackInputData, privacy_report_metadata: PrivacyReportMetadata = None, balance_attacker_training: bool = True) -> AttackResults: """Runs membership inference attacks on a seq2seq model. Args: attack_input: input data for running an attack privacy_report_metadata: the metadata of the model under attack. balance_attacker_training: Whether the training and test sets for the membership inference attacker should have a balanced (roughly equal) number of samples from the training and test sets used to develop the model under attack. Returns: the attack result. """ attack_input.validate() # The attacker uses the average rank (a single number) of a seq2seq dataset # record to determine membership. So only Logistic Regression is supported, # as it makes the most sense for single-number features. attacker = models.LogisticRegressionAttacker() # Create attacker data and populate fields of privacy_report_metadata privacy_report_metadata = privacy_report_metadata or PrivacyReportMetadata( ) prepared_attacker_data = create_seq2seq_attacker_data( attack_input_data=attack_input, balance=balance_attacker_training, privacy_report_metadata=privacy_report_metadata) attacker.train_model(prepared_attacker_data.features_train, prepared_attacker_data.is_training_labels_train) # Run the attacker on (permuted) test examples. predictions_test = attacker.predict(prepared_attacker_data.features_test) # Generate ROC curves with predictions. fpr, tpr, thresholds = metrics.roc_curve( prepared_attacker_data.is_training_labels_test, predictions_test) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) attack_results = [ SingleAttackResult(slice_spec=SingleSliceSpec(), attack_type=AttackType.LOGISTIC_REGRESSION, roc_curve=roc_curve, data_size=prepared_attacker_data.data_size) ] return AttackResults(single_attack_results=attack_results, privacy_report_metadata=privacy_report_metadata)
def _run_threshold_entropy_attack(attack_input: AttackInputData): fpr, tpr, thresholds = metrics.roc_curve( np.concatenate((np.zeros(attack_input.get_train_size()), np.ones(attack_input.get_test_size()))), np.concatenate((attack_input.get_entropy_train(), attack_input.get_entropy_test()))) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) return SingleAttackResult(slice_spec=_get_slice_spec(attack_input), attack_type=AttackType.THRESHOLD_ENTROPY_ATTACK, roc_curve=roc_curve)
def _run_threshold_attack(attack_input: AttackInputData): ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() fpr, tpr, thresholds = metrics.roc_curve( np.concatenate((np.zeros(ntrain), np.ones(ntest))), np.concatenate( (attack_input.get_loss_train(), attack_input.get_loss_test()))) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) return SingleAttackResult( slice_spec=_get_slice_spec(attack_input), data_size=DataSize(ntrain=ntrain, ntest=ntest), attack_type=AttackType.THRESHOLD_ATTACK, membership_scores_train=-attack_input.get_loss_train(), membership_scores_test=-attack_input.get_loss_test(), roc_curve=roc_curve)
def _run_trained_attack(attack_input: AttackInputData, attack_type: AttackType, balance_attacker_training: bool = True): """Classification attack done by ML models.""" attacker = None if attack_type == AttackType.LOGISTIC_REGRESSION: attacker = models.LogisticRegressionAttacker() elif attack_type == AttackType.MULTI_LAYERED_PERCEPTRON: attacker = models.MultilayerPerceptronAttacker() elif attack_type == AttackType.RANDOM_FOREST: attacker = models.RandomForestAttacker() elif attack_type == AttackType.K_NEAREST_NEIGHBORS: attacker = models.KNearestNeighborsAttacker() else: raise NotImplementedError('Attack type %s not implemented yet.' % attack_type) prepared_attacker_data = models.create_attacker_data( attack_input, balance=balance_attacker_training) attacker.train_model(prepared_attacker_data.features_train, prepared_attacker_data.is_training_labels_train) # Run the attacker on (permuted) test examples. predictions_test = attacker.predict(prepared_attacker_data.features_test) # Generate ROC curves with predictions. fpr, tpr, thresholds = metrics.roc_curve( prepared_attacker_data.is_training_labels_test, predictions_test) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) # NOTE: In the current setup we can't obtain membership scores for all # samples, since some of them were used to train the attacker. This can be # fixed by training several attackers to ensure each sample was left out # in exactly one attacker (basically, this means performing cross-validation). # TODO(b/175870479): Implement membership scores for predicted attackers. return SingleAttackResult(slice_spec=_get_slice_spec(attack_input), data_size=prepared_attacker_data.data_size, attack_type=attack_type, roc_curve=roc_curve)
def _run_threshold_attack(attack_input: AttackInputData): """Runs a threshold attack on loss.""" ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size() loss_train = attack_input.get_loss_train() loss_test = attack_input.get_loss_test() if loss_train is None or loss_test is None: raise ValueError( 'Not possible to run threshold attack without losses.') fpr, tpr, thresholds = metrics.roc_curve( np.concatenate((np.zeros(ntrain), np.ones(ntest))), np.concatenate((loss_train, loss_test))) roc_curve = RocCurve(tpr=tpr, fpr=fpr, thresholds=thresholds) return SingleAttackResult( slice_spec=_get_slice_spec(attack_input), data_size=DataSize(ntrain=ntrain, ntest=ntest), attack_type=AttackType.THRESHOLD_ATTACK, membership_scores_train=-attack_input.get_loss_train(), membership_scores_test=-attack_input.get_loss_test(), roc_curve=roc_curve)