Пример #1
0
def test_filter_by_str_slice():
    ds = datasets.mnist("test",
                        shuffle_files=False,
                        preprocessing_fn=None,
                        framework="tf")
    dataset_size = 10000

    with pytest.raises(ValueError):
        datasets.filter_by_str_slice(ds, "[10000:]", dataset_size)

    ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None)
    assert ds.size == dataset_size
    ys = np.hstack([next(ds)[1] for i in range(10)])  # first 10 labels

    for index, target in (
        ("[:5]", ys[:5]),
        ("[3:8]", ys[3:8]),
        ("[0:5]", ys[0:5]),
    ):
        ds = datasets.mnist("test",
                            shuffle_files=False,
                            preprocessing_fn=None,
                            index=index)
        assert ds.size == len(target)
        ys_index = np.hstack([y for (x, y) in ds])
        assert (target == ys_index).all()
Пример #2
0
def test_tf1_mnist():
    classifier_module = import_module("armory.baseline_models.tf_graph.mnist")
    classifier_fn = getattr(classifier_module, "get_art_model")
    classifier = classifier_fn(model_kwargs={}, wrapper_kwargs={})

    train_dataset = datasets.mnist(
        split="train",
        epochs=1,
        batch_size=600,
        dataset_dir=DATASET_DIR,
    )
    test_dataset = datasets.mnist(
        split="test",
        epochs=1,
        batch_size=100,
        dataset_dir=DATASET_DIR,
    )

    classifier.fit_generator(
        train_dataset,
        nb_epochs=1,
    )

    accuracy = 0
    for _ in range(test_dataset.batches_per_epoch):
        x, y = test_dataset.get_batch()
        predictions = classifier.predict(x)
        accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y)
    assert (accuracy / test_dataset.batches_per_epoch) > 0.9
def test_tf_pytorch_equality():

    batch_size = 10
    ds_tf = datasets.mnist(
        split_type="test",
        batch_size=batch_size,
        dataset_dir=DATASET_DIR,
        framework="tf",
        shuffle_files=False,
    )

    ds_pytorch = iter(
        datasets.mnist(
            split_type="test",
            batch_size=batch_size,
            dataset_dir=DATASET_DIR,
            framework="pytorch",
            shuffle_files=False,
        )
    )

    for ex_tf, ex_pytorch in zip(ds_tf, ds_pytorch):

        img_tf = ex_tf[0].numpy()
        label_tf = ex_tf[1].numpy()
        img_pytorch = ex_pytorch[0].numpy()
        label_pytorch = ex_pytorch[1].numpy()

        assert np.amax(np.abs(img_tf - img_pytorch)) == 0
        assert np.amax(np.abs(label_tf - label_pytorch)) == 0
Пример #4
0
def test_filter_by_index():
    ds = datasets.mnist(
        "test", shuffle_files=False, preprocessing_fn=None, framework="tf"
    )
    dataset_size = 10000

    for index in ([], [-4, 5, 6], ["1:3"]):
        with pytest.raises(ValueError):
            datasets.filter_by_index(ds, index, dataset_size)

    ds = datasets.mnist("test", shuffle_files=False, preprocessing_fn=None)
    assert ds.size == dataset_size
    ys = np.hstack([next(ds)[1] for i in range(10)])  # first 10 labels

    for index in (
        [1, 3, 6, 5],
        [0],
        [6, 7, 8, 9, 9, 8, 7, 6],
        list(range(10)),
    ):
        ds = datasets.mnist(
            "test", shuffle_files=False, preprocessing_fn=None, index=index
        )
        index = sorted(set(index))
        assert ds.size == len(index)
        ys_index = np.hstack([y for (x, y) in ds])
        # ys_index = np.hstack([next(ds)[1] for i in range(len(index))])
        assert (ys[index] == ys_index).all()
Пример #5
0
def test_tf_generator():
    dataset = datasets.mnist(
        split_type="train",
        epochs=1,
        batch_size=16,
        dataset_dir=DATASET_DIR,
        framework="tf",
    )
    assert isinstance(dataset, tf.data.Dataset)
Пример #6
0
def test_pytorch_generator():
    with pytest.raises(NotImplementedError):
        _ = datasets.mnist(
            split_type="train",
            epochs=1,
            batch_size=16,
            dataset_dir=DATASET_DIR,
            framework="pytorch",
        )
Пример #7
0
def test_numpy_generator():
    dataset = datasets.mnist(
        split="train",
        epochs=1,
        batch_size=16,
        dataset_dir=DATASET_DIR,
        framework="numpy",
    )
    x, y = dataset.get_batch()
    assert isinstance(x, np.ndarray)
Пример #8
0
def test_generator():
    batch_size = 600
    for split, size in [("train", 60000)]:
        dataset = datasets.mnist(
            split=split, epochs=1, batch_size=batch_size, dataset_dir=DATASET_DIR,
        )

        for x, y in dataset:
            assert x.shape == (batch_size, 28, 28, 1)
            assert y.shape == (batch_size,)
            break
Пример #9
0
def test_parse_split_index_ordering():
    """
    Ensure that output order is deterministic for multiple splits
    """
    index = [5, 37, 38, 56, 111]  # test has max index 9999
    split = "test"
    kwargs = dict(epochs=1, batch_size=1, dataset_dir=DATASET_DIR, shuffle_files=False)
    ds = datasets.mnist(split=split, **kwargs)
    fixed_order = []
    for i, (x, y) in enumerate(ds):
        if i in index:
            fixed_order.append(x)
        if i >= max(index):
            break

    sliced_split = f"{split}[{index}]"
    ds = datasets.mnist(split=sliced_split, **kwargs)
    output_x = [x for (x, y) in ds]
    assert len(fixed_order) == len(output_x)
    for x_i, x_j in zip(fixed_order, output_x):
        assert (x_i == x_j).all()
Пример #10
0
def test_tf_generator():
    dataset = datasets.mnist(
        split="train",
        epochs=1,
        batch_size=16,
        dataset_dir=DATASET_DIR,
        framework="tf",
        preprocessing_fn=None,
        fit_preprocessing_fn=None,
    )
    assert isinstance(dataset,
                      (tf.compat.v2.data.Dataset, tf.compat.v1.data.Dataset))
Пример #11
0
def test_mnist():
    batch_size = 600
    for split, size in [("train", 60000), ("test", 10000)]:
        dataset = datasets.mnist(
            split=split, epochs=1, batch_size=batch_size, dataset_dir=DATASET_DIR,
        )
        assert dataset.size == size
        assert dataset.batch_size == batch_size
        assert dataset.batches_per_epoch == (
            size // batch_size + bool(size % batch_size)
        )

        x, y = dataset.get_batch()
        assert x.shape == (batch_size, 28, 28, 1)
        assert y.shape == (batch_size,)
Пример #12
0
def test_pytorch_generator_mnist():
    batch_size = 16
    dataset = datasets.mnist(
        split_type="train",
        epochs=1,
        batch_size=batch_size,
        dataset_dir=DATASET_DIR,
        framework="pytorch",
    )

    assert isinstance(dataset, torch.utils.data.DataLoader)
    images, labels = next(iter(dataset))
    assert labels.dtype == torch.int64
    assert labels.shape == (batch_size,)

    assert images.dtype == torch.uint8
    assert images.shape == (batch_size, 28, 28, 1)
Пример #13
0
def test_keras_mnist_pretrained():
    classifier_module = import_module("armory.baseline_models.keras.mnist")
    classifier_fn = getattr(classifier_module, "get_art_model")
    weights_path = maybe_download_weights_from_s3("undefended_mnist_5epochs.h5")
    classifier = classifier_fn(
        model_kwargs={}, wrapper_kwargs={}, weights_path=weights_path
    )

    test_dataset = datasets.mnist(
        split="test", epochs=1, batch_size=100, dataset_dir=DATASET_DIR,
    )

    accuracy = 0
    for _ in range(test_dataset.batches_per_epoch):
        x, y = test_dataset.get_batch()
        predictions = classifier.predict(x)
        accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y)
    assert (accuracy / test_dataset.batches_per_epoch) > 0.98
Пример #14
0
def test_pytorch_generator_epochs():
    batch_size = 10
    dataset = datasets.mnist(
        split_type="test",
        epochs=2,
        batch_size=batch_size,
        dataset_dir=DATASET_DIR,
        framework="pytorch",
    )

    cnt = 0
    for images, labels in dataset:
        if cnt == 0:
            first_batch = labels

        if cnt == 1000:
            second_batch = labels
        cnt += 1

    assert cnt == 2000
    assert not torch.all(torch.eq(first_batch, second_batch))
Пример #15
0
def test_pytorch_mnist_pretrained():
    classifier_module = import_module("armory.baseline_models.pytorch.mnist")
    classifier_fn = getattr(classifier_module, "get_art_model")
    classifier = classifier_fn(
        model_kwargs={}, wrapper_kwargs={}, weights_file="undefended_mnist_5epochs.pth"
    )
    preprocessing_fn = getattr(classifier_module, "preprocessing_fn")

    test_dataset = datasets.mnist(
        split_type="test",
        epochs=1,
        batch_size=100,
        dataset_dir=DATASET_DIR,
        preprocessing_fn=preprocessing_fn,
    )

    accuracy = 0
    for _ in range(test_dataset.batches_per_epoch):
        x, y = test_dataset.get_batch()
        predictions = classifier.predict(x)
        accuracy += np.sum(np.argmax(predictions, axis=1) == y) / len(y)
    assert (accuracy / test_dataset.batches_per_epoch) > 0.98