def test(epoch): print('\nEpoch: %d' % epoch) global best_acc global best_epoch global iteration # teacher.eval() student.eval() clean_acc = 0 adv_acc = 0 total = 0 atk_test = PGD(student, eps=8 / 255, alpha=4 / 255, steps=16) for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = inputs.cuda(), targets.cuda() acc = clean_accuracy(student, inputs, targets) clean_acc+=acc adv_images = atk_test(inputs, targets) acc = clean_accuracy(student, adv_images, targets) adv_acc+=acc total += 1 print('Clean Test Accuracy: %.4f' % (float(clean_acc) / float(total) * 100)) print('Purturb Test Accuracy: %.4f' % (float(adv_acc) / float(total) * 100)) acc = clean_acc + adv_acc if acc > best_acc: print('saving') torch.save(student, './result2/WRN28_2_PGD.pth') best_acc = acc best_epoch = epoch print('best_ epoch : %.d | best Acc : %.3f | factor - with KD' % ( best_epoch, best_acc))
def test_clean_acc_jsons_fast(self): config = get_test_config() n_ex = 200 x_test, y_test = load_cifar10(n_ex, config['data_dir']) for norm in model_dicts.keys(): print('Test models robust wrt {}'.format(norm)) models = list(model_dicts[norm].keys()) models.remove( 'Standard' ) # removed temporarily to avoid an error for pytorch 1.4.0 n_tests_passed = 0 for model_name in models: model = load_model(model_name, config['model_dir'], norm).cuda().eval() acc = clean_accuracy(model, x_test, y_test, batch_size=config['batch_size']) self.assertGreater(round(acc * 100., 2), 70.0) success = round(acc * 100., 2) > 70.0 n_tests_passed += success print( '{}: clean accuracy {:.2%} (on {} examples), test passed: {}' .format(model_name, acc, n_ex, success)) print('Test is passed for {}/{} models.'.format( n_tests_passed, len(models)))
def _accuracy_computation(success_criterion: Callable[[str, float, str, str], bool], n_ex: int) -> None: config = get_test_config() device = torch.device(config["device"]) tot_models = 0 n_tests_passed = 0 for dataset, dataset_dict in model_dicts.items(): print(f"Test models trained on {dataset.value}") x_test, y_test = load_clean_dataset(dataset, n_ex, config["data_dir"]) for threat_model, threat_model_dict in dataset_dict.items(): print(f"Test models robust wrt {threat_model.value}") models = list(threat_model_dict.keys()) tot_models += len(models) for model_name in models: model = load_model(model_name, config["model_dir"], dataset, threat_model).to(device) acc = clean_accuracy(model, x_test, y_test, batch_size=config["batch_size"], device=device) success = success_criterion(model_name, acc, dataset.value, threat_model.value) n_tests_passed += int(success) print( f"{model_name}: clean accuracy {acc:.2%} (on {n_ex} examples)," f" test passed: {success}") print(f"Test is passed for {n_tests_passed}/{tot_models} models.")
def corruptions_evaluation(batch_size: int, data_dir: str, dataset: BenchmarkDataset, device: torch.device, model: nn.Module, n_examples: int, to_disk: bool, model_name: Optional[str]) -> float: if to_disk and model_name is None: raise ValueError( "If `to_disk` is True, `model_name` should be specified.") model_results_dict: Dict[Tuple[str, int], float] = {} for corruption in tqdm(CORRUPTIONS): for severity in range(1, 6): x_corrupt, y_corrupt = load_corruptions_dataset( dataset, n_examples, severity, data_dir, corruptions=[corruption]) corruption_severity_accuracy = clean_accuracy( model, x_corrupt, y_corrupt, batch_size=batch_size, device=device) model_results_dict[(corruption, severity)] = corruption_severity_accuracy model_results = pd.DataFrame(model_results_dict, index=[model_name]) adv_accuracy = model_results.values.mean() if not to_disk: return adv_accuracy # Save unaggregated results on disk existing_results_path = Path( "model_info" ) / dataset.value / "corruptions" / "unaggregated_results.csv" if not existing_results_path.parent.exists(): existing_results_path.parent.mkdir(parents=True, exist_ok=True) try: existing_results = pd.read_csv(existing_results_path, header=[0, 1], index_col=0) existing_results.columns = existing_results.columns.set_levels([ existing_results.columns.levels[0], existing_results.columns.levels[1].astype(int) ]) full_results = pd.concat([existing_results, model_results]) except FileNotFoundError: full_results = model_results full_results.to_csv(existing_results_path) return adv_accuracy
def test_clean_acc_jsons_exact(self): config = get_test_config() device = torch.device(config['device']) n_ex = 10000 x_test, y_test = load_cifar10(n_ex, config['data_dir']) for norm in model_dicts.keys(): print('Test models robust wrt {}'.format(norm)) models = list(model_dicts[norm].keys()) models.remove( 'Standard' ) # removed temporarily to avoid an error for pytorch 1.4.0 n_tests_passed = 0 for model_name in models: model = load_model(model_name, config['model_dir'], norm).to(device) acc = clean_accuracy(model, x_test, y_test, batch_size=config['batch_size'], device=device) with open('./model_info/{}/{}.json'.format(norm, model_name), 'r') as model_info: json_dict = json.load(model_info) success = abs( round(acc * 100., 2) - float(json_dict['clean_acc'])) <= 0.05 print('{}: clean accuracy {:.2%}, test passed: {}'.format( model_name, acc, success)) self.assertLessEqual( abs(round(acc * 100., 2) - float(json_dict['clean_acc'])), 0.05) n_tests_passed += success print('Test is passed for {}/{} models.'.format( n_tests_passed, len(models)))
if model.training: warnings.warn(Warning("The given model is *not* in eval mode.")) except AttributeError: warnings.warn(Warning("It is not possible to asses if the model is in eval mode")) dataset_: BenchmarkDataset = BenchmarkDataset(dataset) threat_model_: ThreatModel = ThreatModel(threat_model) device = device or torch.device("cpu") model = model.to(device) clean_x_test, clean_y_test = load_clean_dataset(dataset_, None, data_dir) accuracy = clean_accuracy(model, clean_x_test, clean_y_test, batch_size=batch_size, device=device) print(f'Clean accuracy: {accuracy:.2%}') if threat_model_ in {ThreatModel.Linf, ThreatModel.L2}: if eps is None: raise ValueError( "If the threat model is L2 or Linf, `eps` must be specified.") adversary = AutoAttack(model, norm=threat_model_.value, eps=eps, version='standard', device=device) x_adv = adversary.run_standard_evaluation(clean_x_test, clean_y_test)
parser.add_argument('--device', type=str, default='cuda:0', help='device to use for computations') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() device = torch.device(args.device) x_test, y_test = load_cifar10(args.n_ex, args.data_dir) x_test, y_test = x_test.to(device), y_test.to(device) model = load_model(args.model_name, args.model_dir, args.norm).to(device).eval() acc = clean_accuracy(model, x_test, y_test, batch_size=args.batch_size, device=device) print('Clean accuracy: {:.2%}'.format(acc)) adversary = AutoAttack(model, norm=args.norm, eps=args.eps, version='standard', device=device) x_adv = adversary.run_standard_evaluation(x_test, y_test)