Пример #1
0
def test(epoch):
    print('\nEpoch: %d' % epoch)
    global best_acc
    global best_epoch
    global iteration
    # teacher.eval()
    student.eval()

    clean_acc = 0
    adv_acc = 0
    total = 0
    atk_test = PGD(student, eps=8 / 255, alpha=4 / 255, steps=16)
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.cuda(), targets.cuda()
        acc = clean_accuracy(student, inputs, targets)
        clean_acc+=acc

        adv_images = atk_test(inputs, targets)
        acc = clean_accuracy(student, adv_images, targets)
        adv_acc+=acc

        total += 1


    print('Clean Test Accuracy: %.4f' % (float(clean_acc) / float(total) * 100))
    print('Purturb Test Accuracy: %.4f' % (float(adv_acc) / float(total) * 100))

    acc = clean_acc + adv_acc
    if acc > best_acc:
        print('saving')
        torch.save(student, './result2/WRN28_2_PGD.pth')
        best_acc = acc
        best_epoch = epoch
    print('best_ epoch :  %.d | best Acc :  %.3f   |   factor - with KD' % (
        best_epoch, best_acc))
    def test_clean_acc_jsons_fast(self):
        config = get_test_config()
        n_ex = 200
        x_test, y_test = load_cifar10(n_ex, config['data_dir'])

        for norm in model_dicts.keys():
            print('Test models robust wrt {}'.format(norm))
            models = list(model_dicts[norm].keys())
            models.remove(
                'Standard'
            )  # removed temporarily to avoid an error for pytorch 1.4.0

            n_tests_passed = 0
            for model_name in models:
                model = load_model(model_name, config['model_dir'],
                                   norm).cuda().eval()

                acc = clean_accuracy(model,
                                     x_test,
                                     y_test,
                                     batch_size=config['batch_size'])

                self.assertGreater(round(acc * 100., 2), 70.0)
                success = round(acc * 100., 2) > 70.0
                n_tests_passed += success
                print(
                    '{}: clean accuracy {:.2%} (on {} examples), test passed: {}'
                    .format(model_name, acc, n_ex, success))

            print('Test is passed for {}/{} models.'.format(
                n_tests_passed, len(models)))
Пример #3
0
def _accuracy_computation(success_criterion: Callable[[str, float, str, str],
                                                      bool],
                          n_ex: int) -> None:
    config = get_test_config()
    device = torch.device(config["device"])

    tot_models = 0
    n_tests_passed = 0

    for dataset, dataset_dict in model_dicts.items():
        print(f"Test models trained on {dataset.value}")
        x_test, y_test = load_clean_dataset(dataset, n_ex, config["data_dir"])

        for threat_model, threat_model_dict in dataset_dict.items():
            print(f"Test models robust wrt {threat_model.value}")
            models = list(threat_model_dict.keys())
            tot_models += len(models)

            for model_name in models:
                model = load_model(model_name, config["model_dir"], dataset,
                                   threat_model).to(device)
                acc = clean_accuracy(model,
                                     x_test,
                                     y_test,
                                     batch_size=config["batch_size"],
                                     device=device)

                success = success_criterion(model_name, acc, dataset.value,
                                            threat_model.value)
                n_tests_passed += int(success)
                print(
                    f"{model_name}: clean accuracy {acc:.2%} (on {n_ex} examples),"
                    f" test passed: {success}")

    print(f"Test is passed for {n_tests_passed}/{tot_models} models.")
Пример #4
0
def corruptions_evaluation(batch_size: int, data_dir: str,
                           dataset: BenchmarkDataset, device: torch.device,
                           model: nn.Module, n_examples: int, to_disk: bool,
                           model_name: Optional[str]) -> float:

    if to_disk and model_name is None:
        raise ValueError(
            "If `to_disk` is True, `model_name` should be specified.")

    model_results_dict: Dict[Tuple[str, int], float] = {}
    for corruption in tqdm(CORRUPTIONS):
        for severity in range(1, 6):
            x_corrupt, y_corrupt = load_corruptions_dataset(
                dataset,
                n_examples,
                severity,
                data_dir,
                corruptions=[corruption])

            corruption_severity_accuracy = clean_accuracy(
                model,
                x_corrupt,
                y_corrupt,
                batch_size=batch_size,
                device=device)

            model_results_dict[(corruption,
                                severity)] = corruption_severity_accuracy

    model_results = pd.DataFrame(model_results_dict, index=[model_name])
    adv_accuracy = model_results.values.mean()

    if not to_disk:
        return adv_accuracy

    # Save unaggregated results on disk
    existing_results_path = Path(
        "model_info"
    ) / dataset.value / "corruptions" / "unaggregated_results.csv"
    if not existing_results_path.parent.exists():
        existing_results_path.parent.mkdir(parents=True, exist_ok=True)
    try:
        existing_results = pd.read_csv(existing_results_path,
                                       header=[0, 1],
                                       index_col=0)
        existing_results.columns = existing_results.columns.set_levels([
            existing_results.columns.levels[0],
            existing_results.columns.levels[1].astype(int)
        ])
        full_results = pd.concat([existing_results, model_results])
    except FileNotFoundError:
        full_results = model_results
    full_results.to_csv(existing_results_path)

    return adv_accuracy
Пример #5
0
    def test_clean_acc_jsons_exact(self):
        config = get_test_config()
        device = torch.device(config['device'])
        n_ex = 10000
        x_test, y_test = load_cifar10(n_ex, config['data_dir'])

        for norm in model_dicts.keys():
            print('Test models robust wrt {}'.format(norm))
            models = list(model_dicts[norm].keys())
            models.remove(
                'Standard'
            )  # removed temporarily to avoid an error for pytorch 1.4.0

            n_tests_passed = 0
            for model_name in models:
                model = load_model(model_name, config['model_dir'],
                                   norm).to(device)

                acc = clean_accuracy(model,
                                     x_test,
                                     y_test,
                                     batch_size=config['batch_size'],
                                     device=device)
                with open('./model_info/{}/{}.json'.format(norm, model_name),
                          'r') as model_info:
                    json_dict = json.load(model_info)

                success = abs(
                    round(acc * 100., 2) -
                    float(json_dict['clean_acc'])) <= 0.05
                print('{}: clean accuracy {:.2%}, test passed: {}'.format(
                    model_name, acc, success))
                self.assertLessEqual(
                    abs(round(acc * 100., 2) - float(json_dict['clean_acc'])),
                    0.05)
                n_tests_passed += success

            print('Test is passed for {}/{} models.'.format(
                n_tests_passed, len(models)))
Пример #6
0
        if model.training:
            warnings.warn(Warning("The given model is *not* in eval mode."))
    except AttributeError:
        warnings.warn(Warning("It is not possible to asses if the model is in eval mode"))

    dataset_: BenchmarkDataset = BenchmarkDataset(dataset)
    threat_model_: ThreatModel = ThreatModel(threat_model)

    device = device or torch.device("cpu")
    model = model.to(device)

    clean_x_test, clean_y_test = load_clean_dataset(dataset_, None, data_dir)

    accuracy = clean_accuracy(model,
                              clean_x_test,
                              clean_y_test,
                              batch_size=batch_size,
                              device=device)
    print(f'Clean accuracy: {accuracy:.2%}')

    if threat_model_ in {ThreatModel.Linf, ThreatModel.L2}:
        if eps is None:
            raise ValueError(
                "If the threat model is L2 or Linf, `eps` must be specified.")

        adversary = AutoAttack(model,
                               norm=threat_model_.value,
                               eps=eps,
                               version='standard',
                               device=device)
        x_adv = adversary.run_standard_evaluation(clean_x_test, clean_y_test)
Пример #7
0
    parser.add_argument('--device',
                        type=str,
                        default='cuda:0',
                        help='device to use for computations')
    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()
    device = torch.device(args.device)

    x_test, y_test = load_cifar10(args.n_ex, args.data_dir)
    x_test, y_test = x_test.to(device), y_test.to(device)
    model = load_model(args.model_name, args.model_dir,
                       args.norm).to(device).eval()

    acc = clean_accuracy(model,
                         x_test,
                         y_test,
                         batch_size=args.batch_size,
                         device=device)
    print('Clean accuracy: {:.2%}'.format(acc))

    adversary = AutoAttack(model,
                           norm=args.norm,
                           eps=args.eps,
                           version='standard',
                           device=device)
    x_adv = adversary.run_standard_evaluation(x_test, y_test)