def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False): """ Prints images and bboxes side by side with and without RandomVerticalFlipWithBBox Op applied, tests with MD5 check, expected to pass """ logger.info("test_random_vertical_flip_with_bbox_op_rand_c") original_seed = config_get_set_seed(29847) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Load dataset dataVoc1 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR_VOC, task="Detection", mode="train", decode=True, shuffle=False) test_op = c_vision.RandomVerticalFlipWithBBox(0.8) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], output_columns=["image", "bbox"], columns_order=["image", "bbox"], operations=[test_op]) filename = "random_vertical_flip_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_bounding_box_augment_with_rotation_op(plot_vis=False): """ Test BoundingBoxAugment op (passing rotation op as transform) Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_bounding_box_augment_with_rotation_op") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) # Ratio is set to 1 to apply rotation on all bounding boxes. test_op = c_vision.BoundingBoxAugment(c_vision.RandomRotation(90), 1) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "bbox"], output_columns=["image", "bbox"], columns_order=["image", "bbox"], operations=[test_op]) filename = "bounding_box_augment_rotation_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_compare_random_color_op(degrees=None, plot=False): """ Compare Random Color op in Python and Cpp """ logger.info("test_random_color_op") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) if degrees is None: c_op = vision.RandomColor() p_op = F.RandomColor() else: c_op = vision.RandomColor(degrees) p_op = F.RandomColor(degrees) transforms_random_color_py = F.ComposeOp([lambda img: img.astype(np.uint8), F.ToPIL(), p_op, np.array]) data1 = data1.map(input_columns=["image"], operations=[vision.Decode(), c_op]) data2 = data2.map(input_columns=["image"], operations=[vision.Decode()]) data2 = data2.map(input_columns=["image"], operations=transforms_random_color_py()) image_random_color_op = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) image.append(expected) assert actual.shape == expected.shape mse = diff_mse(actual, expected) logger.info("MSE= {}".format(str(np.mean(mse)))) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) if plot: visualize_list(image, image_random_color_op)
def test_textline_dataset_shuffle_files4(): original_num_parallel_workers = config_get_set_num_parallel_workers(4) original_seed = config_get_set_seed(135) data = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES) count = 0 line = [ "This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone." ] for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 assert count == 5 # Restore configuration ds.config.set_num_parallel_workers(original_num_parallel_workers) ds.config.set_seed(original_seed)
def test_random_resize_with_bbox_op_voc_c(plot_vis=False): """ Prints images and bboxes side by side with and without RandomResizeWithBBox Op applied testing with VOC dataset """ logger.info("test_random_resize_with_bbox_op_voc_c") original_seed = config_get_set_seed(123) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Load dataset dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) test_op = c_vision.RandomResizeWithBBox(100) # map to apply ops dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) filename = "random_resize_with_bbox_op_01_c_voc_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip( dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def skip_test_random_posterize_op_c(plot=False, run_golden=True): """ Test RandomPosterize in C transformations """ logger.info("test_random_posterize_op_c") original_seed = config_get_set_seed(55) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms1 = [c_vision.Decode(), c_vision.RandomPosterize((1, 8))] # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data1 = data1.map(input_columns=["image"], operations=transforms1) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(input_columns=["image"], operations=[c_vision.Decode()]) image_posterize = [] image_original = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) image_original.append(image2) if run_golden: # check results with md5 comparison filename = "random_posterize_01_result_c.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image_original, image_posterize) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_grayscale_input_grayscale_images(): """ Test RandomGrayscale Op: valid parameter with grayscale images as input, expect to pass """ logger.info("test_random_grayscale_input_grayscale_images") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms1 = [ py_vision.Decode(), py_vision.Grayscale(1), # Note: If the input images is grayscale image with 1 channel. py_vision.RandomGrayscale(0.5), py_vision.ToTensor() ] transform1 = py_vision.ComposeOp(transforms1) data1 = data1.map(input_columns=["image"], operations=transform1()) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms2 = [ py_vision.Decode(), py_vision.ToTensor() ] transform2 = py_vision.ComposeOp(transforms2) data2 = data2.map(input_columns=["image"], operations=transform2()) image_gray = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) image.append(image2) assert len(image1.shape) == 3 assert image1.shape[2] == 1 assert len(image2.shape) == 3 assert image2.shape[2] == 3 # Restore config ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_rotation_md5(): """ Test RandomRotation with md5 check """ logger.info("Test RandomRotation with md5 check") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Fisrt dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() resize_op = c_vision.RandomRotation((0, 90), expand=True, resample=Inter.BILINEAR, center=(50, 50), fill_value=150) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) transform2 = mindspore.dataset.transforms.py_transforms.Compose([ py_vision.Decode(), py_vision.RandomRotation((0, 90), expand=True, resample=Inter.BILINEAR, center=(50, 50), fill_value=150), py_vision.ToTensor() ]) data2 = data2.map(operations=transform2, input_columns=["image"]) # Compare with expected md5 from images filename1 = "random_rotation_01_c_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) filename2 = "random_rotation_01_py_result.npz" save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_with_bbox_op2_c(plot_vis=False): """ Prints images and bboxes side by side with and without RandomCropWithBBox Op applied, with md5 check, expected to pass """ logger.info("test_random_crop_with_bbox_op2_c") original_seed = config_get_set_seed(593447) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Load dataset dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", mode="train", decode=True, shuffle=False) # define test OP with values to match existing Op unit - test test_op = c_vision.RandomCropWithBBox(512, [200, 200, 200, 200], fill_value=(255, 255, 255)) dataVoc1 = dataVoc1.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) dataVoc2 = dataVoc2.map(input_columns=["annotation"], output_columns=["annotation"], operations=fix_annotate) # map to apply ops dataVoc2 = dataVoc2.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], columns_order=["image", "annotation"], operations=[test_op]) # Add column for "annotation" filename = "random_crop_with_bbox_01_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(), dataVoc2.create_dict_iterator()): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_and_resize_02(): """ Test RandomCropAndResize with md5 check:Image interpolation mode is Inter.NEAREST, expected to pass """ logger.info("test_random_crop_and_resize_02") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop( (256, 512), interpolation=mode.Inter.NEAREST) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_crop_and_resize_op) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomResizedCrop((256, 512), interpolation=mode.Inter.NEAREST), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data2 = data2.map(input_columns=["image"], operations=transform()) filename1 = "random_crop_and_resize_02_c_result.npz" filename2 = "random_crop_and_resize_02_py_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): """ Test Cpp RandomColor """ logger.info("test_random_color_op") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Decode with rgb format set to True data1 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = ds.TFRecordDataset(C_DATA_DIR, C_SCHEMA_DIR, columns_list=["image"], shuffle=False) # Serialize and Load dataset requires using vision.Decode instead of vision.Decode(). if degrees is None: c_op = vision.RandomColor() else: c_op = vision.RandomColor(degrees) data1 = data1.map(operations=[vision.Decode()], input_columns=["image"]) data2 = data2.map(operations=[vision.Decode(), c_op], input_columns=["image"]) image_random_color_op = [] image = [] for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image.append(actual) image_random_color_op.append(expected) if run_golden: # Compare with expected md5 from images filename = "random_color_op_02_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(image, image_random_color_op) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_crop_decode_resize_md5(): """ Test RandomCropDecodeResize with md5 check """ logger.info("Test RandomCropDecodeResize with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) random_crop_decode_resize_op = vision.RandomCropDecodeResize((256, 512), (1, 1), (0.5, 0.5)) data = data.map(operations=random_crop_decode_resize_op, input_columns=["image"]) # Compare with expected md5 from images filename = "random_crop_decode_resize_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_crop_and_resize_03(): """ Test RandomCropAndResize with md5 check: max_attempts is 1, expected to pass """ logger.info("test_random_crop_and_resize_03") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_crop_and_resize_op = c_vision.RandomResizedCrop((256, 512), max_attempts=1) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_crop_and_resize_op, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), py_vision.RandomResizedCrop((256, 512), max_attempts=1), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) filename1 = "random_crop_and_resize_03_c_result.npz" filename2 = "random_crop_and_resize_03_py_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_cifar10_dataset(remove_json_files=True): """ Test serdes on Cifar10 dataset pipeline """ data_dir = "../data/dataset/testCifar10Data" original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) data1 = ds.Cifar10Dataset(data_dir, num_samples=10, shuffle=False) data1 = data1.take(6) trans = [ vision.RandomCrop((32, 32), (4, 4, 4, 4)), vision.Resize((224, 224)), vision.Rescale(1.0 / 255.0, 0.0), vision.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), vision.HWC2CHW() ] type_cast_op = c.TypeCast(mstype.int32) data1 = data1.map(operations=type_cast_op, input_columns="label") data1 = data1.map(operations=trans, input_columns="image") data1 = data1.batch(3, drop_remainder=True) data1 = data1.repeat(1) data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", remove_json_files) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1['image'], item2['image']) num_samples += 1 assert num_samples == 2 # Restore configuration num_parallel_workers ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_voc_dataset(remove_json_files=True): """ Test serdes on VOC dataset pipeline. """ data_dir = "../data/dataset/testVOC2012" original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations random_color_adjust_op = vision.RandomColorAdjust(brightness=(0.5, 0.5)) random_rotation_op = vision.RandomRotation((0, 90), expand=True, resample=Inter.BILINEAR, center=(50, 50), fill_value=150) data1 = ds.VOCDataset(data_dir, task="Detection", usage="train", decode=True) data1 = data1.map(operations=random_color_adjust_op, input_columns=["image"]) data1 = data1.map(operations=random_rotation_op, input_columns=["image"]) data1 = data1.skip(2) data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", remove_json_files) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1['image'], item2['image']) num_samples += 1 assert num_samples == 7 # Restore configuration num_parallel_workers ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_serdes_random_crop(): """ Test serdes on RandomCrop pipeline. """ logger.info("test_random_crop") DATA_DIR = [ "../data/dataset/test_tf_file_3_images/train-0000-of-0001.data" ] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) decode_op = vision.Decode() random_crop_op = vision.RandomCrop([512, 512], [200, 200, 200, 200]) data1 = data1.map(operations=decode_op, input_columns="image") data1 = data1.map(operations=random_crop_op, input_columns="image") # Serializing into python dictionary ds1_dict = ds.serialize(data1) # Serializing into json object _ = json.dumps(ds1_dict, indent=2) # Reconstruct dataset pipeline from its serialized form data1_1 = ds.deserialize(input_dict=ds1_dict) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = data2.map(operations=decode_op, input_columns="image") for item1, item1_1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data1_1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1['image'], item1_1['image']) _ = item2["image"] # Restore configuration num_parallel_workers ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_vertical_valid_prob_c(): """ Test RandomVerticalFlip op with c_transforms: valid non-default input, expect to pass """ logger.info("test_random_vertical_valid_prob_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_horizontal_op = c_vision.RandomVerticalFlip(0.8) data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_horizontal_op) filename = "random_vertical_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_case_0(): """ Test 1D Generator. Test without explicit kwargs for input args. """ original_seed = config_get_set_seed(55) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # apply dataset qoperations data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.shuffle(2) data1 = data1.map((lambda x: x), ["data"]) data1 = data1.batch(2) expected_data = np.array([[[1], [2]], [[3], [0]]]) for i, data_row in enumerate( data1.create_tuple_iterator(output_numpy=True)): np.testing.assert_array_equal(data_row[0], expected_data[i]) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_random_color_adjust_md5(): """ Test RandomColorAdjust with md5 check """ logger.info("Test RandomColorAdjust with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() random_adjust_op = c_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_adjust_op, input_columns=["image"]) # Second dataset transforms = [ py_vision.Decode(), py_vision.RandomColorAdjust(0.4, 0.4, 0.4, 0.1), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename = "random_color_adjust_01_c_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) filename = "random_color_adjust_01_py_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_03_c(): """ Test RandomCrop op with c_transforms: input image size == crop size, expected to pass """ logger.info("test_random_crop_03_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4032]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_03_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_02_c(): """ Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass """ logger.info("test_random_crop_02_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is a sequence of length 2, it should be (height, width). random_crop_op = c_vision.RandomCrop([512, 375]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_02_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_c_md5(): """ Test RandomSharpness cpp op with md5 comparison """ logger.info("Test RandomSharpness cpp op with md5 comparison") original_seed = config_get_set_seed(200) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [C.Decode(), C.RandomSharpness((10.0, 15.0))] # Generate dataset data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data = data.map(operations=transforms, input_columns=["image"]) # check results with md5 comparison filename = "random_sharpness_cpp_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_01_c(): """ Test RandomCrop op with c_transforms: size is a single integer, expected to pass """ logger.info("test_random_crop_01_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is an int, a square crop of size (size, size) is returned. random_crop_op = c_vision.RandomCrop(512) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_solarize_md5(): """ Test RandomSolarize """ logger.info("Test RandomSolarize") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = vision.Decode() random_solarize_op = vision.RandomSolarize((10, 150)) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=random_solarize_op) # Compare with expected md5 from images filename = "random_solarize_01_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_mixup_batch_md5(): """ Test MixUpBatch with MD5: """ logger.info("test_mixup_batch_md5") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # MixUp Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data = data.map(input_columns=["label"], operations=one_hot_op) mixup_batch_op = vision.MixUpBatch() data = data.batch(5, drop_remainder=True) data = data.map(input_columns=["image", "label"], operations=mixup_batch_op) filename = "mixup_batch_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_sharpness_py_md5(): """ Test RandomSharpness python op with md5 comparison """ logger.info("Test RandomSharpness python op with md5 comparison") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [F.Decode(), F.RandomSharpness((20.0, 25.0)), F.ToTensor()] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_sharpness_py_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_color_md5(): """ Test RandomColor with md5 check """ logger.info("Test RandomColor with md5 check") original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = de.ImageFolderDatasetV2(dataset_dir=DATA_DIR, shuffle=False) transforms = F.ComposeOp( [F.Decode(), F.RandomColor((0.1, 1.9)), F.ToTensor()]) data = data.map(input_columns="image", operations=transforms()) # Compare with expected md5 from images filename = "random_color_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_cutmix_batch_nhwc_md5(): """ Test CutMixBatch on a batch of HWC images with MD5: """ logger.info("test_cutmix_batch_nhwc_md5") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # CutMixBatch Images data = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) one_hot_op = data_trans.OneHot(num_classes=10) data = data.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data = data.batch(5, drop_remainder=True) data = data.map(operations=cutmix_batch_op, input_columns=["image", "label"]) filename = "cutmix_batch_c_nhwc_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_random_crop_08_c(): """ Test RandomCrop op with c_transforms: padding_mode is Border.EDGE, expected to pass """ logger.info("test_random_crop_08_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is Border.EDGE. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_08_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_bounding_box_augment_valid_ratio_c(plot_vis=False): """ Test BoundingBoxAugment op (testing with valid ratio, less than 1. Prints images side by side with and without Aug applied + bboxes to compare and test """ logger.info("test_bounding_box_augment_valid_ratio_c") original_seed = config_get_set_seed(1) original_num_parallel_workers = config_get_set_num_parallel_workers(1) dataVoc1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) dataVoc2 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 0.9) # map to apply ops dataVoc2 = dataVoc2.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) # Add column for "bbox" filename = "bounding_box_augment_valid_ratio_c_result.npz" save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN) unaugSamp, augSamp = [], [] for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) if plot_vis: visualize_with_bounding_boxes(unaugSamp, augSamp) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)