def test_pointwise_attack_on_mnist(): """ Salt-and-Pepper-Attack test """ # upload trained network ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' net = LeNet5() load_dict = load_checkpoint(ckpt_name) load_param_into_net(net, load_dict) # get test data data_list = "./MNIST_unzip/test" batch_size = 32 ds = generate_mnist_dataset(data_list, batch_size=batch_size) # prediction accuracy before attack model = ModelToBeAttacked(net) batch_num = 3 # the number of batches of attacking samples test_images = [] test_labels = [] predict_labels = [] i = 0 for data in ds.create_tuple_iterator(): i += 1 images = data[0].astype(np.float32) labels = data[1] test_images.append(images) test_labels.append(labels) pred_labels = np.argmax(model.predict(images), axis=1) predict_labels.append(pred_labels) if i >= batch_num: break predict_labels = np.concatenate(predict_labels) true_labels = np.concatenate(test_labels) accuracy = np.mean(np.equal(predict_labels, true_labels)) LOGGER.info(TAG, "prediction accuracy before attacking is : %g", accuracy) # attacking is_target = False attack = PointWiseAttack(model=model, is_targeted=is_target) if is_target: targeted_labels = np.random.randint(0, 10, size=len(true_labels)) for i in range(len(true_labels)): if targeted_labels[i] == true_labels[i]: targeted_labels[i] = (targeted_labels[i] + 1) % 10 else: targeted_labels = true_labels success_list, adv_data, query_list = attack.generate( np.concatenate(test_images), targeted_labels) success_list = np.arange(success_list.shape[0])[success_list] LOGGER.info(TAG, 'success_list: %s', success_list) LOGGER.info(TAG, 'average of query times is : %s', np.mean(query_list)) adv_preds = [] for ite_data in adv_data: pred_logits_adv = model.predict(ite_data) # rescale predict confidences into (0, 1). pred_logits_adv = softmax(pred_logits_adv, axis=1) adv_preds.extend(pred_logits_adv) accuracy_adv = np.mean(np.equal(np.max(adv_preds, axis=1), true_labels)) LOGGER.info(TAG, "prediction accuracy after attacking is : %g", accuracy_adv) test_labels_onehot = np.eye(10)[true_labels] attack_evaluate = AttackEvaluate(np.concatenate(test_images), test_labels_onehot, adv_data, adv_preds, targeted=is_target, target_label=targeted_labels) LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', attack_evaluate.mis_classification_rate()) LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', attack_evaluate.avg_conf_adv_class()) LOGGER.info(TAG, 'The average confidence of true class is : %s', attack_evaluate.avg_conf_true_class()) LOGGER.info( TAG, 'The average distance (l0, l2, linf) between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_lp_distance())
def test_genetic_attack_on_mnist(): """ Genetic-Attack test """ # upload trained network ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' net = LeNet5() load_dict = load_checkpoint(ckpt_name) load_param_into_net(net, load_dict) # get test data data_list = "./MNIST_unzip/test" batch_size = 32 ds = generate_mnist_dataset(data_list, batch_size=batch_size) # prediction accuracy before attack model = ModelToBeAttacked(net) batch_num = 3 # the number of batches of attacking samples test_images = [] test_labels = [] predict_labels = [] i = 0 for data in ds.create_tuple_iterator(): i += 1 images = data[0].astype(np.float32) labels = data[1] test_images.append(images) test_labels.append(labels) pred_labels = np.argmax(model.predict(images), axis=1) predict_labels.append(pred_labels) if i >= batch_num: break predict_labels = np.concatenate(predict_labels) true_labels = np.concatenate(test_labels) accuracy = np.mean(np.equal(predict_labels, true_labels)) LOGGER.info(TAG, "prediction accuracy before attacking is : %g", accuracy) # attacking attack = GeneticAttack(model=model, pop_size=6, mutation_rate=0.05, per_bounds=0.1, step_size=0.25, temp=0.1, sparse=True) targeted_labels = np.random.randint(0, 10, size=len(true_labels)) for i, true_l in enumerate(true_labels): if targeted_labels[i] == true_l: targeted_labels[i] = (targeted_labels[i] + 1) % 10 start_time = time.clock() success_list, adv_data, query_list = attack.generate( np.concatenate(test_images), targeted_labels) stop_time = time.clock() LOGGER.info(TAG, 'success_list: %s', success_list) LOGGER.info(TAG, 'average of query times is : %s', np.mean(query_list)) pred_logits_adv = model.predict(adv_data) # rescale predict confidences into (0, 1). pred_logits_adv = softmax(pred_logits_adv, axis=1) pred_lables_adv = np.argmax(pred_logits_adv, axis=1) accuracy_adv = np.mean(np.equal(pred_lables_adv, true_labels)) LOGGER.info(TAG, "prediction accuracy after attacking is : %g", accuracy_adv) test_labels_onehot = np.eye(10)[true_labels] attack_evaluate = AttackEvaluate(np.concatenate(test_images), test_labels_onehot, adv_data, pred_logits_adv, targeted=True, target_label=targeted_labels) LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', attack_evaluate.mis_classification_rate()) LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', attack_evaluate.avg_conf_adv_class()) LOGGER.info(TAG, 'The average confidence of true class is : %s', attack_evaluate.avg_conf_true_class()) LOGGER.info( TAG, 'The average distance (l0, l2, linf) between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_lp_distance()) LOGGER.info( TAG, 'The average structural similarity between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_ssim()) LOGGER.info(TAG, 'The average costing time is %s', (stop_time - start_time) / (batch_num * batch_size))
def test_lbfgs_attack(): """ LBFGS-Attack test """ # upload trained network ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' net = LeNet5() load_dict = load_checkpoint(ckpt_name) load_param_into_net(net, load_dict) # get test data data_list = "./MNIST_unzip/test" batch_size = 32 ds = generate_mnist_dataset(data_list, batch_size=batch_size, sparse=False) # prediction accuracy before attack model = Model(net) batch_num = 3 # the number of batches of attacking samples test_images = [] test_labels = [] predict_labels = [] i = 0 for data in ds.create_tuple_iterator(): i += 1 images = data[0].astype(np.float32) labels = data[1] test_images.append(images) test_labels.append(labels) pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(), axis=1) predict_labels.append(pred_labels) if i >= batch_num: break predict_labels = np.concatenate(predict_labels) true_labels = np.argmax(np.concatenate(test_labels), axis=1) accuracy = np.mean(np.equal(predict_labels, true_labels)) LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy) # attacking is_targeted = True if is_targeted: targeted_labels = np.random.randint( 0, 10, size=len(true_labels)).astype(np.int32) for i in range(len(true_labels)): if targeted_labels[i] == true_labels[i]: targeted_labels[i] = (targeted_labels[i] + 1) % 10 else: targeted_labels = true_labels.astype(np.int32) targeted_labels = np.eye(10)[targeted_labels].astype(np.float32) attack = LBFGS(net, is_targeted=is_targeted) start_time = time.clock() adv_data = attack.batch_generate(np.concatenate(test_images), targeted_labels, batch_size=batch_size) stop_time = time.clock() pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy() # rescale predict confidences into (0, 1). pred_logits_adv = softmax(pred_logits_adv, axis=1) pred_labels_adv = np.argmax(pred_logits_adv, axis=1) accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels)) LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv) attack_evaluate = AttackEvaluate(np.concatenate(test_images).transpose( 0, 2, 3, 1), np.concatenate(test_labels), adv_data.transpose(0, 2, 3, 1), pred_logits_adv, targeted=is_targeted, target_label=np.argmax(targeted_labels, axis=1)) LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', attack_evaluate.mis_classification_rate()) LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', attack_evaluate.avg_conf_adv_class()) LOGGER.info(TAG, 'The average confidence of true class is : %s', attack_evaluate.avg_conf_true_class()) LOGGER.info( TAG, 'The average distance (l0, l2, linf) between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_lp_distance()) LOGGER.info( TAG, 'The average structural similarity between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_ssim()) LOGGER.info(TAG, 'The average costing time is %s', (stop_time - start_time) / (batch_num * batch_size))
def test_momentum_diverse_input_iterative_method(): """ M-DI2-FGSM Attack Test for CPU device. """ context.set_context(mode=context.GRAPH_MODE, device_target="CPU") # upload trained network ckpt_name = './trained_ckpt_file/checkpoint_lenet-10_1875.ckpt' net = LeNet5() load_dict = load_checkpoint(ckpt_name) load_param_into_net(net, load_dict) # get test data data_list = "./MNIST_unzip/test" batch_size = 32 ds = generate_mnist_dataset(data_list, batch_size) # prediction accuracy before attack model = Model(net) batch_num = 32 # the number of batches of attacking samples test_images = [] test_labels = [] predict_labels = [] i = 0 for data in ds.create_tuple_iterator(): i += 1 images = data[0].astype(np.float32) labels = data[1] test_images.append(images) test_labels.append(labels) pred_labels = np.argmax(model.predict(Tensor(images)).asnumpy(), axis=1) predict_labels.append(pred_labels) if i >= batch_num: break predict_labels = np.concatenate(predict_labels) true_labels = np.concatenate(test_labels) accuracy = np.mean(np.equal(predict_labels, true_labels)) LOGGER.info(TAG, "prediction accuracy before attacking is : %s", accuracy) # attacking loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) attack = MomentumDiverseInputIterativeMethod(net, loss_fn=loss) start_time = time.clock() adv_data = attack.batch_generate(np.concatenate(test_images), true_labels, batch_size=32) stop_time = time.clock() pred_logits_adv = model.predict(Tensor(adv_data)).asnumpy() # rescale predict confidences into (0, 1). pred_logits_adv = softmax(pred_logits_adv, axis=1) pred_labels_adv = np.argmax(pred_logits_adv, axis=1) accuracy_adv = np.mean(np.equal(pred_labels_adv, true_labels)) LOGGER.info(TAG, "prediction accuracy after attacking is : %s", accuracy_adv) attack_evaluate = AttackEvaluate( np.concatenate(test_images).transpose(0, 2, 3, 1), np.eye(10)[true_labels], adv_data.transpose(0, 2, 3, 1), pred_logits_adv) LOGGER.info(TAG, 'mis-classification rate of adversaries is : %s', attack_evaluate.mis_classification_rate()) LOGGER.info(TAG, 'The average confidence of adversarial class is : %s', attack_evaluate.avg_conf_adv_class()) LOGGER.info(TAG, 'The average confidence of true class is : %s', attack_evaluate.avg_conf_true_class()) LOGGER.info( TAG, 'The average distance (l0, l2, linf) between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_lp_distance()) LOGGER.info( TAG, 'The average structural similarity between original ' 'samples and adversarial samples are: %s', attack_evaluate.avg_ssim()) LOGGER.info(TAG, 'The average costing time is %s', (stop_time - start_time) / (batch_num * batch_size))