def test_base_attack(model, criterion, image, label): attack = attacks.FGSM(model, criterion) assert attack.name() == "GradientSignAttack" with pytest.raises(ValueError): attack(image) with pytest.raises(TypeError): attack(label=label) wrong_label = label + 1 adv = attack(image, label=label) assert adv is None adv = attack(image, label=wrong_label) assert adv.shape == image.shape adv = attack(image, label=wrong_label, unpack=False) assert adv.perturbed.shape == image.shape adv = Adversarial(model, criterion, image, wrong_label) adv = attack(adv) assert adv.shape == image.shape adv = Adversarial(model, criterion, image, wrong_label) with pytest.raises(ValueError): attack(adv, label=wrong_label) attack = attacks.FGSM() with pytest.raises(ValueError): attack(image, label=wrong_label)
def bn_trivial(bn_trivial_criterion, bn_image, bn_label): criterion = bn_trivial_criterion image = bn_image label = bn_label cm_model = contextmanager(bn_model) with cm_model() as model: adv = Adversarial(model, criterion, image, label) # the original should not yet be considered adversarial # so that the attack implementation is actually called adv._Adversarial__best_adversarial = None adv._Adversarial__best_distance = MSE(value=np.inf) yield adv
def binarized2_bn_adversarial(bn_criterion, bn_image, binarized2_bn_label): criterion = bn_criterion image = bn_image label = binarized2_bn_label cm_model = contextmanager(binarized2_bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def bn_impossible(bn_impossible_criterion, bn_image, bn_label): criterion = bn_impossible_criterion image = bn_image label = bn_label cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def bn_adversarial_mae(bn_criterion, bn_image, bn_label): criterion = bn_criterion image = bn_image label = bn_label distance = MAE cm_model = contextmanager(bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label, distance=distance)
def bn_targeted_adversarial_pytorch(bn_model_pytorch, bn_targeted_criterion, bn_image_pytorch, bn_label_pytorch): model = bn_model_pytorch criterion = bn_targeted_criterion image = bn_image_pytorch label = bn_label_pytorch adv = Adversarial(model, criterion, image, label) assert adv.perturbed is None assert adv.distance.value == np.inf return adv
def eg_bn_adversarial(request, bn_criterion, bn_image, bn_label): criterion = bn_criterion image = bn_image label = bn_label eg_bn_model = eg_bn_model_factory(request) cm_model = contextmanager(eg_bn_model) with cm_model() as model: yield Adversarial(model, criterion, image, label)
def test_early_stopping(bn_model, bn_criterion, bn_image, bn_label): attack = attacks.FGSM() model = bn_model criterion = bn_criterion image = bn_image label = bn_label wrong_label = label + 1 adv = Adversarial(model, criterion, image, wrong_label) attack(adv) assert adv.distance.value == 0 assert not adv.reached_threshold() # because no threshold specified adv = Adversarial(model, criterion, image, wrong_label, threshold=1e10) attack(adv) assert adv.distance.value == 0 assert adv.reached_threshold() adv = Adversarial(model, criterion, image, label) attack(adv) assert adv.distance.value > 0 assert not adv.reached_threshold() # because no threshold specified c = adv._total_prediction_calls d = adv.distance.value large_d = 10 * d small_d = d / 2 adv = Adversarial(model, criterion, image, label, threshold=adv._distance(value=large_d)) attack(adv) assert 0 < adv.distance.value <= large_d assert adv.reached_threshold() assert adv._total_prediction_calls < c adv = Adversarial(model, criterion, image, label, threshold=large_d) attack(adv) assert 0 < adv.distance.value <= large_d assert adv.reached_threshold() assert adv._total_prediction_calls < c adv = Adversarial(model, criterion, image, label, threshold=small_d) attack(adv) assert small_d < adv.distance.value <= large_d assert not adv.reached_threshold() assert adv._total_prediction_calls == c assert adv.distance.value == d adv = Adversarial(model, criterion, image, label, threshold=adv._distance(value=large_d)) attack(adv) assert adv.reached_threshold() c = adv._total_prediction_calls attack(adv) assert adv._total_prediction_calls == c # no new calls
def test_adversarial(model, criterion, image, label): # model = bn_model # criterion = bn_criterion # image = bn_image # label = bn_label adversarial = Adversarial(model, criterion, image, label, verbose=False) assert not adversarial.forward_one(image)[1] assert adversarial.perturbed is None assert adversarial.output is None assert adversarial.adversarial_class is None assert adversarial.distance == MSE(value=np.inf) assert adversarial.unperturbed is image assert adversarial.original_class == label assert adversarial.target_class is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 np.random.seed(22) perturbation = np.random.uniform(-1, 1, size=image.shape) perturbed = np.clip(image + perturbation, 0, 255).astype(np.float32) d1 = adversarial.normalized_distance(perturbed).value assert d1 != 0 assert adversarial.unperturbed.dtype == np.float32 adversarial.set_distance_dtype(np.float32) assert adversarial.normalized_distance(perturbed).value == d1 adversarial.set_distance_dtype(np.float64) assert adversarial.normalized_distance(perturbed).value != d1 adversarial.reset_distance_dtype() assert adversarial.normalized_distance(perturbed).value == d1 true_label = label label = 22 # wrong label adversarial = Adversarial(model, criterion, image, label, verbose=True) assert adversarial.perturbed is not None assert adversarial.output is not None assert adversarial.adversarial_class == true_label assert adversarial.adversarial_class == np.argmax(adversarial.output) assert adversarial.distance == MSE(value=0) assert adversarial.unperturbed is image assert adversarial.original_class == label assert adversarial.target_class is None assert adversarial.normalized_distance(image) == MSE(value=0) assert adversarial.normalized_distance(image).value == 0 predictions, is_adversarial = adversarial.forward_one(image) first_predictions = predictions assert is_adversarial predictions, is_adversarial, _, _ = adversarial.forward_one( image, return_details=True ) first_predictions = predictions assert is_adversarial predictions, is_adversarial = adversarial.forward(image[np.newaxis]) assert (predictions == first_predictions[np.newaxis]).all() assert np.all(is_adversarial == np.array([True])) predictions, is_adversarial, index = adversarial.forward( image[np.newaxis], greedy=True ) assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, is_adversarial, index, _, _ = adversarial.forward( image[np.newaxis], greedy=True, return_details=True ) assert (predictions == first_predictions[np.newaxis]).all() assert is_adversarial assert index == 0 predictions, gradient, is_adversarial = adversarial.forward_and_gradient_one( image, label ) assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial predictions, gradient, is_adversarial, _, _ = adversarial.forward_and_gradient_one( image, label, return_details=True ) assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial images = image[np.newaxis] predictions, gradient, is_adversarial, _, _ = adversarial.forward_and_gradient( images, [label], return_details=True ) assert (predictions == first_predictions).all() assert gradient.shape == images.shape assert is_adversarial[0] predictions, gradient, is_adversarial = adversarial.forward_and_gradient_one() assert (predictions == first_predictions).all() assert gradient.shape == image.shape assert is_adversarial gradient_pre = np.ones_like(predictions) * 0.3 gradient = adversarial.backward_one(gradient_pre, image) gradient2 = adversarial.backward_one(gradient_pre) assert gradient.shape == image.shape assert (gradient == gradient2).all() gradient = adversarial.gradient_one() assert gradient.shape == image.shape assert is_adversarial assert adversarial.num_classes() == 1000 assert adversarial.has_gradient() assert adversarial.channel_axis(batch=True) == 3 assert adversarial.channel_axis(batch=False) == 2 # without adversarials criterion.is_adversarial = Mock(return_value=False) adversarial = Adversarial(model, criterion, image, label) predictions, is_adversarial, index = adversarial.forward( image[np.newaxis], greedy=True ) assert (predictions == first_predictions[np.newaxis]).all() assert not is_adversarial assert index is None del model.gradient assert not adversarial.has_gradient()