示例#1
0
def test_mixup_batch_fail3():
    """
    Test MixUpBatch op
    We expect this to fail because label column is not passed to mixup_batch
    """
    logger.info("test_mixup_batch_fail3")
    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch()
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image"], operations=mixup_batch_op)

    with pytest.raises(RuntimeError) as error:
        images_mixup = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_mixup = image
            else:
                images_mixup = np.append(images_mixup, image, axis=0)
    error_message = "Both images and labels columns are required"
    assert error_message in str(error.value)
示例#2
0
    def loader(self):
        """Dataloader arrtribute which is a unified interface to generate the data.

        :return: a batch data
        :rtype: dict, list, optional
        """
        ms_dataset = GeneratorDataset(self.dataset, ["image", "label"],
                                      sampler=self.sampler)
        # ms_dataset.set_dataset_size(len(self.dataset))  # TODO delete, only mindspore 0.5 need
        ms_dataset = self.convert_dtype(ms_dataset)
        if self.args.shuffle:
            buffer_size = self.args.get("buffer_size", len(self.dataset))
            ms_dataset = ms_dataset.shuffle(buffer_size=buffer_size)

        if self.args.get("mixup", False):
            num_class = self.args.get("num_class")
            one_hot_op = C2.OneHot(num_classes=num_class)
            ms_dataset = ms_dataset.map(operations=one_hot_op,
                                        input_columns=["label"])

            mixup_batch_op = vision.MixUpBatch(2)
            ms_dataset = ms_dataset.batch(self.args.batch_size)
            ms_dataset = ms_dataset.map(operations=mixup_batch_op,
                                        input_columns=["image", "label"])
        else:
            ms_dataset = ms_dataset.batch(self.args.batch_size)

        from mindspore.dataset.engine.datasets import BatchDataset, MapDataset
        BatchDataset.__len__ = BatchDataset.get_dataset_size
        MapDataset.__len__ = MapDataset.get_dataset_size
        return ms_dataset
def test_random_choice():
    """
    Test RandomChoice op
    """
    ds.config.set_seed(0)

    def test_config(arr, op_list):
        try:
            data = ds.NumpySlicesDataset(arr,
                                         column_names="col",
                                         shuffle=False)
            data = data.map(operations=ops.RandomChoice(op_list),
                            input_columns=["col"])
            res = []
            for i in data.create_dict_iterator(num_epochs=1,
                                               output_numpy=True):
                res.append(i["col"].tolist())
            return res
        except (TypeError, ValueError) as e:
            return str(e)

    # Test whether an operation would be randomly chosen.
    # In order to prevent random failure, both results need to be checked.
    res1 = test_config([[0, 1, 2]], [ops.PadEnd([4], 0), ops.Slice([0, 2])])
    assert res1 in [[[0, 1, 2, 0]], [[0, 2]]]

    # Test nested structure
    res2 = test_config([[0, 1, 2]], [
        ops.Compose([ops.Duplicate(), ops.Concatenate()]),
        ops.Compose([ops.Slice([0, 1]), ops.OneHot(2)])
    ])
    assert res2 in [[[[1, 0], [0, 1]]], [[0, 1, 2, 0, 1, 2]]]
    # Test RandomChoice where there is only 1 operation
    assert test_config([[4, 3], [2, 1]], [ops.Slice([0])]) == [[4], [2]]
def test_mnist_dataset(remove_json_files=True):
    data_dir = "../data/dataset/testMnistData"
    ds.config.set_seed(1)

    data1 = ds.MnistDataset(data_dir, 100)
    one_hot_encode = c.OneHot(10)  # num_classes is input argument
    data1 = data1.map(input_columns="label", operations=one_hot_encode)

    # batch_size is input argument
    data1 = data1.batch(batch_size=10, drop_remainder=True)

    ds.serialize(data1, "mnist_dataset_pipeline.json")
    assert validate_jsonfile("mnist_dataset_pipeline.json") is True

    data2 = ds.deserialize(json_filepath="mnist_dataset_pipeline.json")
    ds.serialize(data2, "mnist_dataset_pipeline_1.json")
    assert validate_jsonfile("mnist_dataset_pipeline_1.json") is True
    assert filecmp.cmp('mnist_dataset_pipeline.json', 'mnist_dataset_pipeline_1.json')

    data3 = ds.deserialize(json_filepath="mnist_dataset_pipeline_1.json")

    num = 0
    for data1, data2, data3 in zip(data1.create_dict_iterator(), data2.create_dict_iterator(),
                                   data3.create_dict_iterator()):
        assert np.array_equal(data1['image'], data2['image'])
        assert np.array_equal(data1['image'], data3['image'])
        assert np.array_equal(data1['label'], data2['label'])
        assert np.array_equal(data1['label'], data3['label'])
        num += 1

    logger.info("mnist total num samples is {}".format(str(num)))
    assert num == 10

    if remove_json_files:
        delete_json_files()
示例#5
0
def test_mixup_batch_fail4():
    """
    Test MixUpBatch Fail 2
    We expect this to fail because alpha is zero
    """
    logger.info("test_mixup_batch_fail4")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5)

    images_original = np.array([])
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    with pytest.raises(ValueError) as error:
        vision.MixUpBatch(0.0)
        error_message = "Input is not within the required interval"
        assert error_message in str(error.value)
示例#6
0
def test_cutmix_batch_nchw_md5():
    """
    Test CutMixBatch on a batch of CHW images with MD5:
    """
    logger.info("test_cutmix_batch_nchw_md5")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # CutMixBatch Images
    data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    hwc2chw_op = vision.HWC2CHW()
    data = data.map(input_columns=["image"], operations=hwc2chw_op)
    one_hot_op = data_trans.OneHot(num_classes=10)
    data = data.map(input_columns=["label"], operations=one_hot_op)
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
    data = data.batch(5, drop_remainder=True)
    data = data.map(input_columns=["image", "label"],
                    operations=cutmix_batch_op)

    filename = "cutmix_batch_c_nchw_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#7
0
def test_one_hot():
    """
    Test OneHot Tensor Operator
    """
    logger.info("test_one_hot")

    depth = 10

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    one_hot_op = data_trans.OneHot(num_classes=depth)
    data1 = data1.map(input_columns=["label"],
                      operations=one_hot_op,
                      columns_order=["label"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["label"],
                               shuffle=False)

    num_iter = 0
    for item1, item2 in zip(data1.create_dict_iterator(),
                            data2.create_dict_iterator()):
        assert len(item1) == len(item2)
        label1 = item1["label"]
        label2 = one_hot(item2["label"][0], depth)
        mse = diff_mse(label1, label2)
        logger.info("DE one_hot: {}, Numpy one_hot: {}, diff: {}".format(
            label1, label2, mse))
        assert mse == 0
        num_iter += 1
    assert num_iter == 3
示例#8
0
def test_cutmix_batch_fail5():
    """
    Test CutMixBatch op
    We expect this to fail because label column is not passed to cutmix_batch
    """
    logger.info("test_cutmix_batch_fail5")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op, input_columns=["image"])

    with pytest.raises(RuntimeError) as error:
        images_cutmix = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image.asnumpy()
            else:
                images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    error_message = "both image and label columns are required"
    assert error_message in str(error.value)
示例#9
0
def test_cutmix_batch_fail6():
    """
    Test CutMixBatch op
    We expect this to fail because image_batch_format passed to CutMixBatch doesn't match the format of the images
    """
    logger.info("test_cutmix_batch_fail6")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image", "label"],
                      operations=cutmix_batch_op)

    with pytest.raises(RuntimeError) as error:
        images_cutmix = np.array([])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image
            else:
                images_cutmix = np.append(images_cutmix, image, axis=0)
    error_message = "CutMixBatch: Image doesn't match the given image format."
    assert error_message in str(error.value)
示例#10
0
def test_manifest_sampler_chain_batch_repeat():
    """
    Test ManifestDataset sampler chain DistributedSampler->SequentialSampler, with batch then repeat
    """
    logger.info("test_manifest_sampler_chain_batch_repeat")
    manifest_file = "../data/dataset/testManifestData/test5trainimgs.json"

    # Create sampler chain DistributedSampler->SequentialSampler
    sampler = ds.DistributedSampler(num_shards=1,
                                    shard_id=0,
                                    shuffle=False,
                                    num_samples=5)
    child_sampler = ds.SequentialSampler()
    sampler.add_child(child_sampler)

    # Create ManifestDataset with sampler chain
    data1 = ds.ManifestDataset(manifest_file, decode=True, sampler=sampler)
    one_hot_encode = c_transforms.OneHot(3)
    data1 = data1.map(operations=one_hot_encode, input_columns=["label"])
    data1 = data1.batch(batch_size=5, drop_remainder=False)
    data1 = data1.repeat(count=2)

    # Verify dataset size
    data1_size = data1.get_dataset_size()
    logger.info("dataset size is: {}".format(data1_size))
    assert data1_size == 2
示例#11
0
def test_cutmix_batch_fail1():
    """
    Test CutMixBatch Fail 1
    We expect this to fail because the images and labels are not batched
    """
    logger.info("test_cutmix_batch_fail1")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    with pytest.raises(RuntimeError) as error:
        data1 = data1.map(operations=cutmix_batch_op,
                          input_columns=["image", "label"])
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_cutmix = image.asnumpy()
            else:
                images_cutmix = np.append(images_cutmix,
                                          image.asnumpy(),
                                          axis=0)
        error_message = "You must make sure images are HWC or CHW and batch "
        assert error_message in str(error.value)
示例#12
0
def test_mixup_batch_fail1():
    """
    Test MixUpBatch Fail 1
    We expect this to fail because the images and labels are not batched
    """
    logger.info("test_mixup_batch_fail1")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5)

    images_original = np.array([])
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch(0.1)
    with pytest.raises(RuntimeError) as error:
        data1 = data1.map(input_columns=["image", "label"], operations=mixup_batch_op)
        for idx, (image, _) in enumerate(data1):
            if idx == 0:
                images_mixup = image
            else:
                images_mixup = np.append(images_mixup, image, axis=0)
        error_message = "You must make sure images are HWC or CHW and batch"
        assert error_message in str(error.value)
def test_mix_up_multi():
    """
    Test multi batch mix up op
    """
    logger.info("Test several batch mix up op")

    resize_height = 224
    resize_width = 224

    # Create dataset and define map operations
    ds1 = ds.ImageFolderDatasetV2(DATA_DIR_2)

    num_classes = 3
    decode_op = c_vision.Decode()
    resize_op = c_vision.Resize((resize_height, resize_width),
                                c_vision.Inter.LINEAR)
    one_hot_encode = c.OneHot(num_classes)  # num_classes is input argument

    ds1 = ds1.map(input_columns=["image"], operations=decode_op)
    ds1 = ds1.map(input_columns=["image"], operations=resize_op)
    ds1 = ds1.map(input_columns=["label"], operations=one_hot_encode)

    # apply batch operations
    batch_size = 3
    ds1 = ds1.batch(batch_size, drop_remainder=True)

    ds2 = ds1
    alpha = 0.2
    transforms = [
        py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
    ]
    ds1 = ds1.map(input_columns=["image", "label"], operations=transforms)
    num_iter = 0
    batch1_image1 = 0
    for data1, data2 in zip(ds1.create_dict_iterator(),
                            ds2.create_dict_iterator()):
        image1 = data1["image"]
        label1 = data1["label"]
        logger.info("label: {}".format(label1))

        image2 = data2["image"]
        label2 = data2["label"]
        logger.info("label2: {}".format(label2))

        if num_iter == 0:
            batch1_image1 = image1

        if num_iter == 1:
            lam = np.abs(label2 - label1)
            logger.info("lam value in multi: {}".format(lam))
            for index in range(batch_size):
                if np.square(lam[index]).mean() != 0:
                    lam_value = 1 - np.sum(lam[index]) / 2
                    img_golden = lam_value * image2[index] + (
                        1 - lam_value) * batch1_image1[index]
                    assert image1[index].all() == img_golden.all()
                    logger.info("====test several batch mixup ok====")
            break
        num_iter = num_iter + 1
示例#14
0
def test_cutmix_batch_success4(plot=False):
    """
    Test CutMixBatch on a dataset where OneHot returns a 2D vector
    """
    logger.info("test_cutmix_batch_success4")

    ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    resize_op = vision.Resize([224, 224])
    ds_original = ds_original.map(operations=[resize_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(2, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # CutMix Images
    data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    resize_op = vision.Resize([224, 224])
    data1 = data1.map(operations=[resize_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=100)
    data1 = data1.map(operations=one_hot_op, input_columns=["attr"])

    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 0.5, 0.9)
    data1 = data1.batch(2, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "attr"])

    images_cutmix = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_cutmix = image.asnumpy()
        else:
            images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_cutmix)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_cutmix[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#15
0
def test_cutmix_batch_success3(plot=False):
    """
    Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset
    """
    logger.info("test_cutmix_batch_success3")

    ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    resize_op = vision.Resize([224, 224])
    ds_original = ds_original.map(operations=[resize_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # CutMix Images
    data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    resize_op = vision.Resize([224, 224])
    data1 = data1.map(operations=[resize_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])

    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(4, pad_info={}, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "label"])

    images_cutmix = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_cutmix = image.asnumpy()
        else:
            images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_cutmix)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_cutmix[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#16
0
def test_manifest_dataset_multi_label_onehot():
    data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False)
    expect_label = [[[0, 1, 0], [1, 0, 0]], [[1, 0, 0], [1, 0, 1]]]
    one_hot_encode = data_trans.OneHot(3)
    data = data.map(operations=one_hot_encode, input_columns=["label"])
    data = data.map(operations=multi_label_hot, input_columns=["label"])
    data = data.batch(2)
    count = 0
    for item in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        assert item["label"].tolist() == expect_label[count]
        logger.info("item[image] is {}".format(item["image"]))
        count = count + 1
示例#17
0
def generate_mnist_dataset(data_path,
                           batch_size=32,
                           repeat_size=1,
                           samples=None,
                           num_parallel_workers=1,
                           sparse=True):
    """
    create dataset for training or testing
    """
    # define dataset
    ds1 = ds.MnistDataset(data_path, num_samples=samples)

    # define operation parameters
    resize_height, resize_width = 32, 32
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    resize_op = CV.Resize((resize_height, resize_width),
                          interpolation=Inter.LINEAR)
    rescale_op = CV.Rescale(rescale, shift)
    hwc2chw_op = CV.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    # apply map operations on images
    if not sparse:
        one_hot_enco = C.OneHot(10)
        ds1 = ds1.map(input_columns="label",
                      operations=one_hot_enco,
                      num_parallel_workers=num_parallel_workers)
        type_cast_op = C.TypeCast(mstype.float32)
    ds1 = ds1.map(input_columns="label",
                  operations=type_cast_op,
                  num_parallel_workers=num_parallel_workers)
    ds1 = ds1.map(input_columns="image",
                  operations=resize_op,
                  num_parallel_workers=num_parallel_workers)
    ds1 = ds1.map(input_columns="image",
                  operations=rescale_op,
                  num_parallel_workers=num_parallel_workers)
    ds1 = ds1.map(input_columns="image",
                  operations=hwc2chw_op,
                  num_parallel_workers=num_parallel_workers)

    # apply DatasetOps
    buffer_size = 10000
    ds1 = ds1.shuffle(buffer_size=buffer_size)
    ds1 = ds1.batch(batch_size, drop_remainder=True)
    ds1 = ds1.repeat(repeat_size)

    return ds1
示例#18
0
def test_mixup_batch_success4(plot=False):
    """
    Test MixUpBatch op on a dataset where OneHot returns a 2D vector.
    Alpha parameter will be selected by default in this case
    """
    logger.info("test_mixup_batch_success4")

    # Original Images
    ds_original = ds.CelebADataset(DATA_DIR3, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(2, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # MixUp Images
    data1 = ds.CelebADataset(DATA_DIR3, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=100)
    data1 = data1.map(operations=one_hot_op, input_columns=["attr"])

    mixup_batch_op = vision.MixUpBatch()
    data1 = data1.batch(2, drop_remainder=True)
    data1 = data1.map(operations=mixup_batch_op,
                      input_columns=["image", "attr"])

    images_mixup = np.array([])
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_mixup = image.asnumpy()
        else:
            images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_mixup)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_mixup[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#19
0
def test_mixup_batch_success2(plot=False):
    """
    Test MixUpBatch op with specified alpha parameter on ImageFolderDataset
    """
    logger.info("test_mixup_batch_success2")

    # Original Images
    ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)
    decode_op = vision.Decode()
    ds_original = ds_original.map(operations=[decode_op],
                                  input_columns=["image"])
    ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # MixUp Images
    data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False)

    decode_op = vision.Decode()
    data1 = data1.map(operations=[decode_op], input_columns=["image"])

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])

    mixup_batch_op = vision.MixUpBatch(2.0)
    data1 = data1.batch(4, pad_info={}, drop_remainder=True)
    data1 = data1.map(operations=mixup_batch_op,
                      input_columns=["image", "label"])

    images_mixup = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_mixup = image.asnumpy()
        else:
            images_mixup = np.append(images_mixup, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_mixup)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_mixup[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
def test_mix_up_single():
    """
    Test single batch mix up op
    """
    logger.info("Test single batch mix up op")

    resize_height = 224
    resize_width = 224

    # Create dataset and define map operations
    ds1 = ds.ImageFolderDataset(DATA_DIR_2)

    num_classes = 10
    decode_op = c_vision.Decode()
    resize_op = c_vision.Resize((resize_height, resize_width), c_vision.Inter.LINEAR)
    one_hot_encode = c.OneHot(num_classes)  # num_classes is input argument

    ds1 = ds1.map(operations=decode_op, input_columns=["image"])
    ds1 = ds1.map(operations=resize_op, input_columns=["image"])
    ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])

    # apply batch operations
    batch_size = 3
    ds1 = ds1.batch(batch_size, drop_remainder=True)

    ds2 = ds1
    alpha = 0.2
    transforms = [py_vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=True)
                  ]
    ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])

    for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True),
                            ds2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        image1 = data1["image"]
        label = data1["label"]
        logger.info("label is {}".format(label))

        image2 = data2["image"]
        label2 = data2["label"]
        logger.info("label2 is {}".format(label2))

        lam = np.abs(label - label2)
        for index in range(batch_size - 1):
            if np.square(lam[index]).mean() != 0:
                lam_value = 1 - np.sum(lam[index]) / 2
                img_golden = lam_value * image2[index] + (1 - lam_value) * image2[index + 1]
                assert image1[index].all() == img_golden.all()
                logger.info("====test single batch mixup ok====")
示例#21
0
def test_cutmix_batch_fail4():
    """
    Test CutMixBatch Fail 2
    We expect this to fail because prob is negative
    """
    logger.info("test_cutmix_batch_fail4")

    # CutMixBatch Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    with pytest.raises(ValueError) as error:
        vision.CutMixBatch(mode.ImageBatchFormat.NHWC, 1, -1)
        error_message = "Input is not within the required interval"
        assert error_message in str(error.value)
示例#22
0
def test_case_3():
    """
    Test Map
    """
    logger.info("Test Map Rescale and Resize, then Shuffle")
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    # define data augmentation parameters
    rescale = 1.0 / 255.0
    shift = 0.0
    resize_height, resize_width = 224, 224

    # define map operations
    decode_op = vision.Decode()
    rescale_op = vision.Rescale(rescale, shift)
    # resize_op = vision.Resize(resize_height, resize_width,
    #                            InterpolationMode.DE_INTER_LINEAR)  # Bilinear mode
    resize_op = vision.Resize((resize_height, resize_width))

    # apply map operations on images
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=rescale_op)
    data1 = data1.map(input_columns=["image"], operations=resize_op)

    # # apply ont-hot encoding on labels
    num_classes = 4
    one_hot_encode = data_trans.OneHot(
        num_classes)  # num_classes is input argument
    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)
    #
    # # apply Datasets
    buffer_size = 100
    seed = 10
    batch_size = 2
    ds.config.set_seed(seed)
    data1 = data1.shuffle(
        buffer_size=buffer_size)  # 10000 as in imageNet train script
    data1 = data1.batch(batch_size, drop_remainder=True)

    num_iter = 0
    for item in data1.create_dict_iterator():  # each data is a dictionary
        # in this example, each dictionary has keys "image" and "label"
        logger.info("image is: {}".format(item["image"]))
        logger.info("label is: {}".format(item["label"]))
        num_iter += 1

    logger.info("Number of data in data1: {}".format(num_iter))
示例#23
0
def test_cutmix_batch_success1(plot=False):
    """
    Test CutMixBatch op with specified alpha and prob parameters on a batch of CHW images
    """
    logger.info("test_cutmix_batch_success1")
    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # CutMix Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    hwc2chw_op = vision.HWC2CHW()
    data1 = data1.map(operations=hwc2chw_op, input_columns=["image"])
    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NCHW, 2.0, 0.5)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "label"])

    images_cutmix = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_cutmix = image.asnumpy().transpose(0, 2, 3, 1)
        else:
            images_cutmix = np.append(images_cutmix,
                                      image.asnumpy().transpose(0, 2, 3, 1),
                                      axis=0)
    if plot:
        visualize_list(images_original, images_cutmix)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_cutmix[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#24
0
def test_cutmix_batch_success2(plot=False):
    """
    Test CutMixBatch op with default values for alpha and prob on a batch of rescaled HWC images
    """
    logger.info("test_cutmix_batch_success2")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image.asnumpy()
        else:
            images_original = np.append(images_original,
                                        image.asnumpy(),
                                        axis=0)

    # CutMix Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(operations=one_hot_op, input_columns=["label"])
    rescale_op = vision.Rescale((1.0 / 255.0), 0.0)
    data1 = data1.map(operations=rescale_op, input_columns=["image"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(operations=cutmix_batch_op,
                      input_columns=["image", "label"])

    images_cutmix = None
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_cutmix = image.asnumpy()
        else:
            images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0)
    if plot:
        visualize_list(images_original, images_cutmix)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_cutmix[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#25
0
def test_one_hot_post_aug():
    """
    Test One Hot Encoding after Multiple Data Augmentation Operators
    """
    logger.info("test_one_hot_post_aug")
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)

    # Define data augmentation parameters
    rescale = 1.0 / 255.0
    shift = 0.0
    resize_height, resize_width = 224, 224

    # Define map operations
    decode_op = c_vision.Decode()
    rescale_op = c_vision.Rescale(rescale, shift)
    resize_op = c_vision.Resize((resize_height, resize_width))

    # Apply map operations on images
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=rescale_op)
    data1 = data1.map(input_columns=["image"], operations=resize_op)

    # Apply one-hot encoding on labels
    depth = 4
    one_hot_encode = data_trans.OneHot(depth)
    data1 = data1.map(input_columns=["label"], operations=one_hot_encode)

    # Apply datasets ops
    buffer_size = 100
    seed = 10
    batch_size = 2
    ds.config.set_seed(seed)
    data1 = data1.shuffle(buffer_size=buffer_size)
    data1 = data1.batch(batch_size, drop_remainder=True)

    num_iter = 0
    for item in data1.create_dict_iterator():
        logger.info("image is: {}".format(item["image"]))
        logger.info("label is: {}".format(item["label"]))
        num_iter += 1

    assert num_iter == 1
示例#26
0
def test_mixup_batch_success2(plot=False):
    """
    Test MixUpBatch op without specified alpha parameter.
    Alpha parameter will be selected by default in this case
    """
    logger.info("test_mixup_batch_success2")

    # Original Images
    ds_original = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)
    ds_original = ds_original.batch(5, drop_remainder=True)

    images_original = None
    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = image
        else:
            images_original = np.append(images_original, image, axis=0)

    # MixUp Images
    data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data1 = data1.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch()
    data1 = data1.batch(5, drop_remainder=True)
    data1 = data1.map(input_columns=["image", "label"],
                      operations=mixup_batch_op)

    images_mixup = np.array([])
    for idx, (image, _) in enumerate(data1):
        if idx == 0:
            images_mixup = image
        else:
            images_mixup = np.append(images_mixup, image, axis=0)
    if plot:
        visualize_list(images_original, images_mixup)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_mixup[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#27
0
def test_one_hot():
    """
    Test OneHot Tensor Operator
    """
    logger.info("test_one_hot")

    depth = 10

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    one_hot_op = data_trans.OneHot(num_classes=depth)
    data1 = data1.map(operations=one_hot_op,
                      input_columns=["label"],
                      column_order=["label"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["label"],
                               shuffle=False)

    assert dataset_equal_with_function(data1, data2, 0, one_hot, depth)
示例#28
0
def test_mixup_batch_md5():
    """
    Test MixUpBatch with MD5:
    """
    logger.info("test_mixup_batch_md5")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # MixUp Images
    data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data = data.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch()
    data = data.batch(5, drop_remainder=True)
    data = data.map(input_columns=["image", "label"], operations=mixup_batch_op)

    filename = "mixup_batch_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)