def run_experiment(adv_attack, mfe, batch_size=64, seed=0, info_msg=''): if info_msg: print(info_msg) fix_seed(seed) train_dataloader = get_images_dataloader( DATA_PATH, batch_size, transforms=get_images_transforms()) print('Starting power method...') adv_attack.fit(mfe, train_dataloader) print('Done power method!') generated_pert = adv_attack.get_perturbation().cpu() pert_imgs_dataloader = get_images_dataloader( DATA_PATH, 128, transforms=get_images_transforms(generated_pert)) print('Starting predicting classes of perturbated images...') pert_ans = adv_attack.predict_raw(mfe, pert_imgs_dataloader) print('Done predicting!') eigen_value = adv_attack.power_method.eigen_val print('Done with experiment') print('=' * 50) return dict(perturbation=generated_pert, eigen_value=eigen_value, perturbated_answers=pert_ans)
def run_experiment(): device = 'cuda' if torch.cuda.is_available() else 'cpu' model = torchvision.models.vgg19(pretrained=True) layer = model.features[4] perturbation = get_perturbation_with_norm(model, layer, adv_norm=1) pert_norm_grid = np.linspace(0, 30, 192) print('Generating imgs with perturbation...') all_perturbed_imgs = [] for norm_value in pert_norm_grid: dl = get_images_dataloader( './random_samples/', 16, transforms=get_images_transforms(perturbation * norm_value) ) img = next(iter(dl))['image'][-2] all_perturbed_imgs.append(img.unsqueeze(0)) all_perturbed_imgs = torch.cat(all_perturbed_imgs) print('Evaluating...') mfe = ModelFeatureExtracter(model, layer).to(device) logits = mfe(all_perturbed_imgs.to(device)) probs = torch.softmax(logits, -1) exp_dir_name = 'exps_results/top5_probs_exp' os.makedirs(exp_dir_name, exist_ok=True) make_plot(probs, pert_norm_grid, exp_dir_name) for i, img in enumerate(all_perturbed_imgs[::32]): plt.axis('off') plt.imshow(normalize_image(img)) plt.savefig(os.path.join(exp_dir_name, f'img_with_inf_norm_{pert_norm_grid[i * 32]}.jpg'))
def evaluate_perturbation_on_samples(mfe, perturbation, directory): device = 'cuda' if torch.cuda.is_available() else 'cpu' p = perturbation.permute(1, 2, 0).numpy() mx = p.max() mn = p.min() plt.imshow((p - mn) / (mx - mn)) plt.savefig(os.path.join(directory, 'perturbation.jpg'), dpi=200) dataloader = get_images_dataloader( './random_samples/', 1, transforms=get_images_transforms(perturbation)) answers = {} for batch in dataloader: logits = mfe(batch['image'].to(device)) probs = torch.softmax(logits, -1) prob, target_class = map(lambda x: x.item(), torch.max(probs, dim=-1)) answers[batch['name'][0]] = { 'prediction': idx2label[target_class], 'class_id': target_class, 'probability': prob } img_pert = batch['image'][0] mx = img_pert.max() mn = img_pert.min() img_pert = (img_pert - mn) / (mx - mn) plt.axis('off') plt.imshow(img_pert.permute(1, 2, 0)) plt.savefig(os.path.join(directory, batch['name'][0]), dpi=200) with open(os.path.join(directory, 'model_ans.json'), 'w') as f: json.dump(answers, f, indent=4)
def create_adversarial_attack(mfe, q=10, device=torch.device('cpu'), verbose=1): mfe.to(device) train_dataloader = get_images_dataloader(DATA_PATH, 1, transforms=get_images_transforms()) input_img = next(iter(train_dataloader))['image'].to(device) input_shape = input_img.shape[1:] output_shape = mfe.extract_layer_output(input_img).shape[1:] return AdversarialAttack(input_shape, output_shape, q=q, pm_maxiter=20, device=device, verbose=verbose)
def get_perturbation_with_norm(model, layer, seed=0, batch_size=64, adv_norm=1): device = 'cuda' if torch.cuda.is_available() else 'cpu' mfe = ModelFeatureExtracter(model, layer) adv_attack = create_adversarial_attack(mfe, device=device) fix_seed(seed) train_dataloader = get_images_dataloader(DATA_PATH, batch_size, transforms=get_images_transforms()) print('Generating perturbation...') adv_attack.fit(mfe, train_dataloader) print('Done power method!') return adv_attack.get_perturbation(adv_norm=adv_norm).cpu()
def run_all_experiments(): device = 'cuda' if torch.cuda.is_available() else 'cpu' results = defaultdict(dict) models = { 'vgg16': torchvision.models.vgg16(pretrained=True), 'vgg19': torchvision.models.vgg19(pretrained=True), 'resnet50': torchvision.models.resnet50(pretrained=True) } layers = { 'vgg16': models['vgg16'].features[4], 'vgg19': models['vgg19'].features[4], 'resnet50': models['resnet50'].maxpool } all_perturbations = get_all_perturbations(models, layers) raw_imgs_dataloader = get_images_dataloader(DATA_PATH, 128, transforms=get_images_transforms()) for model_name, model in models.items(): mfe = ModelFeatureExtracter(model, layers[model_name]) adv_attack = create_adversarial_attack(mfe, device=device) print(f'Getting initial predictions for {model_name}...') initial_predictions = adv_attack.predict_raw(mfe, raw_imgs_dataloader) for pert_name, perturbation in all_perturbations.items(): pert_dataloader = get_images_dataloader(DATA_PATH, 128, transforms=get_images_transforms(perturbation)) print(f'Getting perturbated predictions for {model_name} with perturbation `{pert_name}`...') pert_predictions = adv_attack.predict_raw(mfe, pert_dataloader) fooling_rate = AdversarialAttack.fooling_rate( initial_predictions['predictions'], pert_predictions['predictions'] ) print(f'Got {fooling_rate} fooling_rate for perturbation `{pert_name}` when evaluating on {model_name}') results[model_name][pert_name] = fooling_rate return results
def get_all_perturbations(models, layers, seed=0, batch_size=64): device = 'cuda' if torch.cuda.is_available() else 'cpu' print('Getting perturbations for all models...') results = {} for model_name, model in models.items(): print(f'Starting power method for {model_name}...') mfe = ModelFeatureExtracter(model, layers[model_name]) adv_attack = create_adversarial_attack(mfe, device=device) fix_seed(seed) train_dataloader = get_images_dataloader(DATA_PATH, batch_size, transforms=get_images_transforms()) adv_attack.fit(mfe, train_dataloader) print('Done power method!') results[model_name] = adv_attack.get_perturbation().cpu() return results
def get_model_predictions_on_samples(mfe): device = 'cuda' if torch.cuda.is_available() else 'cpu' dataloader = get_images_dataloader('./random_samples/', 1, transforms=get_images_transforms()) answers = {} for batch in dataloader: logits = mfe(batch['image'].to(device)) probs = torch.softmax(logits, -1) prob, target_class = map(lambda x: x.item(), torch.max(probs, dim=-1)) answers[batch['name'][0]] = { 'prediction': idx2label[target_class], 'class_id': target_class, 'probability': prob } return answers
def run_all_experiments_with_model(model, layers, model_name): device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'Started exp with {model_name}') init_mfe = ModelFeatureExtracter(model, list(layers.values())[0]).to(device) raw_imgs_dataloader = get_images_dataloader( DATA_PATH, 128, transforms=get_images_transforms()) model_initial_predictions = create_adversarial_attack( init_mfe, device=device).predict_raw(init_mfe, raw_imgs_dataloader) exp_results = {} for layer_name, layer in layers.items(): msg = f'Running {model_name} exp with {layer_name} layer' mfe = ModelFeatureExtracter(model, layer) adv_attack = create_adversarial_attack(mfe, device=device) res = run_experiment(adv_attack, mfe, info_msg=msg) exp_results[layer_name] = res fooling_rate = AdversarialAttack.fooling_rate( model_initial_predictions['predictions'], res['perturbated_answers']['predictions']) print(f'Fooling rate is {fooling_rate}') print('Evaluating on samples...') exp_dir_name = f'./exps_results/{model_name}_{layer_name}_exp' os.makedirs(exp_dir_name, exist_ok=True) evaluate_perturbation_on_samples(mfe, res['perturbation'], exp_dir_name) init_answers = get_model_predictions_on_samples(mfe) with open(os.path.join(exp_dir_name, 'initial_predictions.json'), 'w') as f: json.dump(init_answers, f, indent=4) print( f'Done evaluating. See {exp_dir_name} for samples and model predictions' ) print("=" * 50) print(f'Done exp with {model_name}')
def make_exp(): # preparing model raw_transforms = get_images_transforms() raw_dataloader = get_images_dataloader(IMAGES_PATH, BATCH_SIZE, transforms=raw_transforms) model = torchvision.models.vgg19(pretrained=True) layer_to_extract_from = model.features[LAYER_FOR_EXTRACTION_NUM] mfe = ModelFeatureExtracter(model, layer_to_extract_from).to(DEVICE) input_img = next(iter(raw_dataloader))['image'].to(DEVICE) input_shape = input_img.shape[1:] output_shape = mfe.extract_layer_output(input_img).shape[1:] # running experiment fix_seed(999) fooling_rates = [] perturbations = [] for batch_sz in BATCH_GRID: print(f'Trying to attack with batch {batch_sz}') start = time() raw_dataloader = get_images_dataloader(IMAGES_PATH, batch_sz, transforms=raw_transforms) adv_attack = AdversarialAttack(input_shape, output_shape, device=DEVICE, verbose=1) adv_attack.fit(mfe, raw_dataloader) pert = adv_attack.get_perturbation().cpu() perturbations.append(pert) pert_transforms = get_images_transforms(perturbation=pert) pert_dataloader = get_images_dataloader(IMAGES_PATH, 128, transforms=pert_transforms) raw_dataloader_big = get_images_dataloader(IMAGES_PATH, 128, transforms=raw_transforms) print('Trying to evaluate raw') raw_pred = adv_attack.predict_raw(mfe, raw_dataloader_big) print('Trying to evaluate perturbed') pert_pred = adv_attack.predict_raw(mfe, pert_dataloader) fooling_rate = adv_attack.fooling_rate(raw_pred['predictions'], pert_pred['predictions']) fooling_rates.append(fooling_rate) print( f'Ended attacking with batch {batch_sz}, fooling rate {fooling_rate}, time spent {(time() - start) / 60} mins' ) print() # saving experiment results plt.plot(BATCH_GRID, fooling_rates) plt.grid(b=True) plt.xlabel('batch size') plt.ylabel('fooling rate') plt.title('fooling rate dependency by batch size') plt.savefig(EXP_PATH + '/fooling_rate_dependency.png', dpi=200) fig, ax = plt.subplots(1, 3, figsize=(13, 13)) fig.tight_layout() for i, idx in enumerate(range(0, len(BATCH_GRID), 4)): ax[i].imshow(normalize_image(perturbations[idx])) ax[i].set_axis_off() ax[i].set_title(f'Batch size {BATCH_GRID[idx]}') fig.savefig(EXP_PATH + '/sample_perturbations.png', dpi=200) with open(EXP_PATH + '/exp_results', 'w') as f: json.dump( { 'fooling_rates': fooling_rates, 'batch_grid': list(BATCH_GRID) }, f)