示例#1
0
def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False):
    """
    Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied,
    tests with MD5 check, expected to pass
    """
    logger.info("test_random_vertical_flip_with_bbox_op_rand_c")
    original_seed = config_get_set_seed(29847)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Load dataset
    dataVoc1 = ds.VOCDataset(DATA_DIR_VOC,
                             task="Detection",
                             mode="train",
                             decode=True,
                             shuffle=False)

    dataVoc2 = ds.VOCDataset(DATA_DIR_VOC,
                             task="Detection",
                             mode="train",
                             decode=True,
                             shuffle=False)

    test_op = c_vision.RandomVerticalFlipWithBBox(0.8)

    # map to apply ops
    dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
                            output_columns=["image", "bbox"],
                            columns_order=["image", "bbox"],
                            operations=[test_op])

    filename = "random_vertical_flip_with_bbox_01_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataVoc1.create_dict_iterator(),
                          dataVoc2.create_dict_iterator()):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#2
0
def test_bounding_box_augment_with_rotation_op(plot_vis=False):
    """
    Test BoundingBoxAugment op (passing rotation op as transform)
    Prints images side by side with and without Aug applied + bboxes to compare and test
    """
    logger.info("test_bounding_box_augment_with_rotation_op")

    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    dataVoc1 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             mode="train",
                             decode=True,
                             shuffle=False)
    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             mode="train",
                             decode=True,
                             shuffle=False)

    # Ratio is set to 1 to apply rotation on all bounding boxes.
    test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1)

    # map to apply ops
    dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"],
                            output_columns=["image", "bbox"],
                            columns_order=["image", "bbox"],
                            operations=[test_op])

    filename = "bounding_box_augment_rotation_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataVoc1.create_dict_iterator(),
                          dataVoc2.create_dict_iterator()):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#3
0
def test_compare_random_color_op(degrees=None, plot=False):
    """
    Compare Random Color op in Python and Cpp
    """

    logger.info("test_random_color_op")

    original_seed = config_get_set_seed(5)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Decode with rgb format set to True
    data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False)

    if degrees is None:
        c_op = vision.RandomColor()
        p_op = F.RandomColor()
    else:
        c_op = vision.RandomColor(degrees)
        p_op = F.RandomColor(degrees)

    transforms_random_color_py = F.ComposeOp([lambda img: img.astype(np.uint8), F.ToPIL(),
                                              p_op, np.array])

    data1 = data1.map(input_columns=["image"], operations=[vision.Decode(), c_op])
    data2 = data2.map(input_columns=["image"], operations=[vision.Decode()])
    data2 = data2.map(input_columns=["image"], operations=transforms_random_color_py())

    image_random_color_op = []
    image = []

    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
        actual = item1["image"]
        expected = item2["image"]
        image_random_color_op.append(actual)
        image.append(expected)
        assert actual.shape == expected.shape
        mse = diff_mse(actual, expected)
        logger.info("MSE= {}".format(str(np.mean(mse))))

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)

    if plot:
        visualize_list(image, image_random_color_op)
def test_textline_dataset_shuffle_files4():
    original_num_parallel_workers = config_get_set_num_parallel_workers(4)
    original_seed = config_get_set_seed(135)
    data = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES)
    count = 0
    line = [
        "This is a text file.", "Another file.", "Be happy every day.",
        "End of file.", "Good luck to everyone."
    ]
    for i in data.create_dict_iterator(num_epochs=1, output_numpy=True):
        strs = i["text"].item().decode("utf8")
        assert strs == line[count]
        count += 1
    assert count == 5
    # Restore configuration
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
    ds.config.set_seed(original_seed)
示例#5
0
def test_random_resize_with_bbox_op_voc_c(plot_vis=False):
    """
    Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied
    testing with VOC dataset
    """
    logger.info("test_random_resize_with_bbox_op_voc_c")
    original_seed = config_get_set_seed(123)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)
    # Load dataset
    dataVoc1 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)

    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)

    test_op = c_vision.RandomResizeWithBBox(100)

    # map to apply ops
    dataVoc2 = dataVoc2.map(operations=[test_op],
                            input_columns=["image", "bbox"],
                            output_columns=["image", "bbox"],
                            column_order=["image", "bbox"])

    filename = "random_resize_with_bbox_op_01_c_voc_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(
            dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
            dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#6
0
def skip_test_random_posterize_op_c(plot=False, run_golden=True):
    """
    Test RandomPosterize in C transformations
    """
    logger.info("test_random_posterize_op_c")

    original_seed = config_get_set_seed(55)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # define map operations
    transforms1 = [c_vision.Decode(), c_vision.RandomPosterize((1, 8))]

    #  First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data1 = data1.map(input_columns=["image"], operations=transforms1)
    #  Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()])

    image_posterize = []
    image_original = []
    for item1, item2 in zip(data1.create_dict_iterator(),
                            data2.create_dict_iterator()):
        image1 = item1["image"]
        image2 = item2["image"]
        image_posterize.append(image1)
        image_original.append(image2)

    if run_golden:
        # check results with md5 comparison
        filename = "random_posterize_01_result_c.npz"
        save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)

    if plot:
        visualize_list(image_original, image_posterize)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#7
0
def test_random_grayscale_input_grayscale_images():
    """
    Test RandomGrayscale Op: valid parameter with grayscale images as input, expect to pass
    """
    logger.info("test_random_grayscale_input_grayscale_images")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms1 = [
        py_vision.Decode(),
        py_vision.Grayscale(1),
        # Note: If the input images is grayscale image with 1 channel.
        py_vision.RandomGrayscale(0.5),
        py_vision.ToTensor()
    ]
    transform1 = py_vision.ComposeOp(transforms1)
    data1 = data1.map(input_columns=["image"], operations=transform1())

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    transforms2 = [
        py_vision.Decode(),
        py_vision.ToTensor()
    ]
    transform2 = py_vision.ComposeOp(transforms2)
    data2 = data2.map(input_columns=["image"], operations=transform2())

    image_gray = []
    image = []
    for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()):
        image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
        image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8)
        image_gray.append(image1)
        image.append(image2)

        assert len(image1.shape) == 3
        assert image1.shape[2] == 1
        assert len(image2.shape) == 3
        assert image2.shape[2] == 3

    # Restore config
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_rotation_md5():
    """
    Test RandomRotation with md5 check
    """
    logger.info("Test RandomRotation with md5 check")
    original_seed = config_get_set_seed(5)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Fisrt dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = c_vision.Decode()
    resize_op = c_vision.RandomRotation((0, 90),
                                        expand=True,
                                        resample=Inter.BILINEAR,
                                        center=(50, 50),
                                        fill_value=150)
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=resize_op, input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False)
    transform2 = mindspore.dataset.transforms.py_transforms.Compose([
        py_vision.Decode(),
        py_vision.RandomRotation((0, 90),
                                 expand=True,
                                 resample=Inter.BILINEAR,
                                 center=(50, 50),
                                 fill_value=150),
        py_vision.ToTensor()
    ])
    data2 = data2.map(operations=transform2, input_columns=["image"])

    # Compare with expected md5 from images
    filename1 = "random_rotation_01_c_result.npz"
    save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)
    filename2 = "random_rotation_01_py_result.npz"
    save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_with_bbox_op2_c(plot_vis=False):
    """
     Prints images and bboxes side by side with and without RandomCropWithBBox Op applied,
     with md5 check, expected to pass
    """
    logger.info("test_random_crop_with_bbox_op2_c")
    original_seed = config_get_set_seed(593447)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Load dataset
    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)
    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False)

    # define test OP with values to match existing Op unit - test
    test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], fill_value=(255, 255, 255))

    dataVoc1 = dataVoc1.map(input_columns=["annotation"],
                            output_columns=["annotation"],
                            operations=fix_annotate)
    dataVoc2 = dataVoc2.map(input_columns=["annotation"],
                            output_columns=["annotation"],
                            operations=fix_annotate)
    # map to apply ops
    dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"],
                            output_columns=["image", "annotation"],
                            columns_order=["image", "annotation"],
                            operations=[test_op])  # Add column for "annotation"

    filename = "random_crop_with_bbox_01_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_and_resize_02():
    """
    Test RandomCropAndResize with md5 check:Image interpolation mode is Inter.NEAREST,
    expected to pass
    """
    logger.info("test_random_crop_and_resize_02")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = c_vision.Decode()
    random_crop_and_resize_op = c_vision.RandomResizedCrop(
        (256, 512), interpolation=mode.Inter.NEAREST)
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"],
                      operations=random_crop_and_resize_op)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    transforms = [
        py_vision.Decode(),
        py_vision.RandomResizedCrop((256, 512),
                                    interpolation=mode.Inter.NEAREST),
        py_vision.ToTensor()
    ]
    transform = py_vision.ComposeOp(transforms)
    data2 = data2.map(input_columns=["image"], operations=transform())

    filename1 = "random_crop_and_resize_02_c_result.npz"
    filename2 = "random_crop_and_resize_02_py_result.npz"
    save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)
    save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#11
0
def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True):
    """
    Test Cpp RandomColor
    """
    logger.info("test_random_color_op")

    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Decode with rgb format set to True
    data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False)

    # Serialize and Load dataset requires using vision.Decode instead of vision.Decode().
    if degrees is None:
        c_op = vision.RandomColor()
    else:
        c_op = vision.RandomColor(degrees)

    data1 = data1.map(operations=[vision.Decode()], input_columns=["image"])
    data2 = data2.map(operations=[vision.Decode(), c_op], input_columns=["image"])

    image_random_color_op = []
    image = []

    for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
                            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        actual = item1["image"]
        expected = item2["image"]
        image.append(actual)
        image_random_color_op.append(expected)

    if run_golden:
        # Compare with expected md5 from images
        filename = "random_color_op_02_result.npz"
        save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN)

    if plot:
        visualize_list(image, image_random_color_op)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_crop_decode_resize_md5():
    """
    Test RandomCropDecodeResize with md5 check
    """
    logger.info("Test RandomCropDecodeResize with md5 check")
    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    random_crop_decode_resize_op = vision.RandomCropDecodeResize((256, 512), (1, 1), (0.5, 0.5))
    data = data.map(operations=random_crop_decode_resize_op, input_columns=["image"])
    # Compare with expected md5 from images
    filename = "random_crop_decode_resize_01_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers((original_num_parallel_workers))
示例#13
0
def test_random_crop_and_resize_03():
    """
    Test RandomCropAndResize with md5 check: max_attempts is 1, expected to pass
    """
    logger.info("test_random_crop_and_resize_03")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = c_vision.Decode()
    random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512),
                                                           max_attempts=1)
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_crop_and_resize_op,
                      input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    transforms = [
        py_vision.Decode(),
        py_vision.RandomResizedCrop((256, 512), max_attempts=1),
        py_vision.ToTensor()
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data2 = data2.map(operations=transform, input_columns=["image"])

    filename1 = "random_crop_and_resize_03_c_result.npz"
    filename2 = "random_crop_and_resize_03_py_result.npz"
    save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN)
    save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_cifar10_dataset(remove_json_files=True):
    """
    Test serdes on Cifar10 dataset pipeline
    """
    data_dir = "../data/dataset/testCifar10Data"
    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    data1 = ds.Cifar10Dataset(data_dir, num_samples=10, shuffle=False)
    data1 = data1.take(6)

    trans = [
        vision.RandomCrop((32, 32), (4, 4, 4, 4)),
        vision.Resize((224, 224)),
        vision.Rescale(1.0 / 255.0, 0.0),
        vision.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        vision.HWC2CHW()
    ]

    type_cast_op = c.TypeCast(mstype.int32)
    data1 = data1.map(operations=type_cast_op, input_columns="label")
    data1 = data1.map(operations=trans, input_columns="image")
    data1 = data1.batch(3, drop_remainder=True)
    data1 = data1.repeat(1)
    data2 = util_check_serialize_deserialize_file(data1,
                                                  "cifar10_dataset_pipeline",
                                                  remove_json_files)

    num_samples = 0
    # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
    for item1, item2 in zip(
            data1.create_dict_iterator(num_epochs=1, output_numpy=True),
            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        np.testing.assert_array_equal(item1['image'], item2['image'])
        num_samples += 1

    assert num_samples == 2

    # Restore configuration num_parallel_workers
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_voc_dataset(remove_json_files=True):
    """
    Test serdes on VOC dataset pipeline.
    """
    data_dir = "../data/dataset/testVOC2012"
    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # define map operations
    random_color_adjust_op = vision.RandomColorAdjust(brightness=(0.5, 0.5))
    random_rotation_op = vision.RandomRotation((0, 90),
                                               expand=True,
                                               resample=Inter.BILINEAR,
                                               center=(50, 50),
                                               fill_value=150)

    data1 = ds.VOCDataset(data_dir,
                          task="Detection",
                          usage="train",
                          decode=True)
    data1 = data1.map(operations=random_color_adjust_op,
                      input_columns=["image"])
    data1 = data1.map(operations=random_rotation_op, input_columns=["image"])
    data1 = data1.skip(2)
    data2 = util_check_serialize_deserialize_file(data1,
                                                  "voc_dataset_pipeline",
                                                  remove_json_files)

    num_samples = 0
    # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
    for item1, item2 in zip(
            data1.create_dict_iterator(num_epochs=1, output_numpy=True),
            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        np.testing.assert_array_equal(item1['image'], item2['image'])
        num_samples += 1

    assert num_samples == 7

    # Restore configuration num_parallel_workers
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_random_crop():
    """
    Test serdes on RandomCrop pipeline.
    """
    logger.info("test_random_crop")
    DATA_DIR = [
        "../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"
    ]
    SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    decode_op = vision.Decode()
    random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200])
    data1 = data1.map(operations=decode_op, input_columns="image")
    data1 = data1.map(operations=random_crop_op, input_columns="image")

    # Serializing into python dictionary
    ds1_dict = ds.serialize(data1)
    # Serializing into json object
    _ = json.dumps(ds1_dict, indent=2)

    # Reconstruct dataset pipeline from its serialized form
    data1_1 = ds.deserialize(input_dict=ds1_dict)

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"])
    data2 = data2.map(operations=decode_op, input_columns="image")

    for item1, item1_1, item2 in zip(
            data1.create_dict_iterator(num_epochs=1, output_numpy=True),
            data1_1.create_dict_iterator(num_epochs=1, output_numpy=True),
            data2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        np.testing.assert_array_equal(item1['image'], item1_1['image'])
        _ = item2["image"]

    # Restore configuration num_parallel_workers
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#17
0
def test_random_vertical_valid_prob_c():
    """
    Test RandomVerticalFlip op with c_transforms: valid non-default input, expect to pass
    """
    logger.info("test_random_vertical_valid_prob_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    decode_op = c_vision.Decode()
    random_horizontal_op = c_vision.RandomVerticalFlip(0.8)
    data = data.map(input_columns=["image"], operations=decode_op)
    data = data.map(input_columns=["image"], operations=random_horizontal_op)

    filename = "random_vertical_01_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#18
0
def test_case_0():
    """
    Test 1D Generator.
    Test without explicit kwargs for input args.
    """
    original_seed = config_get_set_seed(55)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # apply dataset qoperations
    data1 = ds.GeneratorDataset(generator_1d, ["data"])
    data1 = data1.shuffle(2)
    data1 = data1.map((lambda x: x), ["data"])
    data1 = data1.batch(2)

    expected_data = np.array([[[1], [2]], [[3], [0]]])
    for i, data_row in enumerate(
            data1.create_tuple_iterator(output_numpy=True)):
        np.testing.assert_array_equal(data_row[0], expected_data[i])

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers((original_num_parallel_workers))
示例#19
0
def test_random_color_adjust_md5():
    """
    Test RandomColorAdjust with md5 check
    """
    logger.info("Test RandomColorAdjust with md5 check")
    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = c_vision.Decode()
    random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1)
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_adjust_op, input_columns=["image"])

    # Second dataset
    transforms = [
        py_vision.Decode(),
        py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1),
        py_vision.ToTensor()
    ]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)
    data2 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    data2 = data2.map(operations=transform, input_columns=["image"])
    # Compare with expected md5 from images
    filename = "random_color_adjust_01_c_result.npz"
    save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
    filename = "random_color_adjust_01_py_result.npz"
    save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#20
0
def test_random_crop_03_c():
    """
    Test RandomCrop op with c_transforms: input image size == crop size, expected to pass
    """
    logger.info("test_random_crop_03_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: The size of the image is 4032*2268
    random_crop_op = c_vision.RandomCrop([2268, 4032])
    decode_op = c_vision.Decode()
    data = data.map(operations=decode_op, input_columns=["image"])
    data = data.map(operations=random_crop_op, input_columns=["image"])

    filename = "random_crop_03_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#21
0
def test_random_crop_02_c():
    """
    Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass
    """
    logger.info("test_random_crop_02_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: If size is a sequence of length 2, it should be (height, width).
    random_crop_op = c_vision.RandomCrop([512, 375])
    decode_op = c_vision.Decode()
    data = data.map(operations=decode_op, input_columns=["image"])
    data = data.map(operations=random_crop_op, input_columns=["image"])

    filename = "random_crop_02_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_c_md5():
    """
    Test RandomSharpness cpp op with md5 comparison
    """
    logger.info("Test RandomSharpness cpp op with md5 comparison")
    original_seed = config_get_set_seed(200)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # define map operations
    transforms = [C.Decode(), C.RandomSharpness((10.0, 15.0))]

    #  Generate dataset
    data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(operations=transforms, input_columns=["image"])

    # check results with md5 comparison
    filename = "random_sharpness_cpp_01_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#23
0
def test_random_crop_01_c():
    """
    Test RandomCrop op with c_transforms: size is a single integer, expected to pass
    """
    logger.info("test_random_crop_01_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: If size is an int, a square crop of size (size, size) is returned.
    random_crop_op = c_vision.RandomCrop(512)
    decode_op = c_vision.Decode()
    data = data.map(operations=decode_op, input_columns=["image"])
    data = data.map(operations=random_crop_op, input_columns=["image"])

    filename = "random_crop_01_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_solarize_md5():
    """
    Test RandomSolarize
    """
    logger.info("Test RandomSolarize")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    data1 = ds.TFRecordDataset(DATA_DIR,
                               SCHEMA_DIR,
                               columns_list=["image"],
                               shuffle=False)
    decode_op = vision.Decode()
    random_solarize_op = vision.RandomSolarize((10, 150))
    data1 = data1.map(input_columns=["image"], operations=decode_op)
    data1 = data1.map(input_columns=["image"], operations=random_solarize_op)
    # Compare with expected md5 from images
    filename = "random_solarize_01_result.npz"
    save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#25
0
def test_mixup_batch_md5():
    """
    Test MixUpBatch with MD5:
    """
    logger.info("test_mixup_batch_md5")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # MixUp Images
    data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data = data.map(input_columns=["label"], operations=one_hot_op)
    mixup_batch_op = vision.MixUpBatch()
    data = data.batch(5, drop_remainder=True)
    data = data.map(input_columns=["image", "label"], operations=mixup_batch_op)

    filename = "mixup_batch_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_py_md5():
    """
    Test RandomSharpness python op with md5 comparison
    """
    logger.info("Test RandomSharpness python op with md5 comparison")
    original_seed = config_get_set_seed(5)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # define map operations
    transforms = [F.Decode(), F.RandomSharpness((20.0, 25.0)), F.ToTensor()]
    transform = mindspore.dataset.transforms.py_transforms.Compose(transforms)

    #  Generate dataset
    data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    data = data.map(operations=transform, input_columns=["image"])

    # check results with md5 comparison
    filename = "random_sharpness_py_01_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#27
0
def test_random_color_md5():
    """
    Test RandomColor with md5 check
    """
    logger.info("Test RandomColor with md5 check")
    original_seed = config_get_set_seed(10)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False)

    transforms = F.ComposeOp(
        [F.Decode(), F.RandomColor((0.1, 1.9)),
         F.ToTensor()])

    data = data.map(input_columns="image", operations=transforms())
    # Compare with expected md5 from images
    filename = "random_color_01_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore configuration
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers((original_num_parallel_workers))
示例#28
0
def test_cutmix_batch_nhwc_md5():
    """
    Test CutMixBatch on a batch of HWC images with MD5:
    """
    logger.info("test_cutmix_batch_nhwc_md5")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # CutMixBatch Images
    data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False)

    one_hot_op = data_trans.OneHot(num_classes=10)
    data = data.map(operations=one_hot_op, input_columns=["label"])
    cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC)
    data = data.batch(5, drop_remainder=True)
    data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"])

    filename = "cutmix_batch_c_nhwc_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#29
0
def test_random_crop_08_c():
    """
    Test RandomCrop op with c_transforms: padding_mode is Border.EDGE,
    expected to pass
    """
    logger.info("test_random_crop_08_c")
    original_seed = config_get_set_seed(0)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    # Generate dataset
    data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    # Note: The padding_mode is Border.EDGE.
    random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE)
    decode_op = c_vision.Decode()
    data = data.map(operations=decode_op, input_columns=["image"])
    data = data.map(operations=random_crop_op, input_columns=["image"])

    filename = "random_crop_08_c_result.npz"
    save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#30
0
def test_bounding_box_augment_valid_ratio_c(plot_vis=False):
    """
    Test BoundingBoxAugment op (testing with valid ratio, less than 1.
    Prints images side by side with and without Aug applied + bboxes to compare and test
    """
    logger.info("test_bounding_box_augment_valid_ratio_c")

    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True)
    dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True)

    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9)

    # map to apply ops
    dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"],
                            output_columns=["image", "bbox"],
                            column_order=["image", "bbox"])  # Add column for "bbox"

    filename = "bounding_box_augment_valid_ratio_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
                          dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)