def test_batch_11(): """ Test batch: batch_size=1 and dataset-size=1 """ logger.info("test_batch_11") # define parameters batch_size = 1 parameters = {"params": {'batch_size': batch_size}} # apply dataset operations # Use schema file with 1 row schema_file = "../data/dataset/testTFTestAllTypes/datasetSchema1Row.json" data1 = ds.TFRecordDataset(DATA_DIR, schema_file) data1 = data1.batch(batch_size) assert sum([1 for _ in data1]) == 1 filename = "batch_11_result.npz" save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
def test_filter(predicate_func): transforms = [cde.Decode(), cde.Resize([64, 64])] dataset = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], shuffle=False) dataset = dataset.map(input_columns=["image"], operations=transforms, num_parallel_workers=1) dataset = dataset.filter(input_columns=["image", "label"], predicate=predicate_func, num_parallel_workers=4) num_iter = 0 label_list = [] for data in dataset.create_dict_iterator(): num_iter += 1 label = data["label"] label_list.append(label) assert num_iter == 1 assert label_list[0] == 3
def test_TFRecord_Padded(): DATA_DIR = [ "../data/dataset/test_tf_file_3_images/train-0000-of-0001.data" ] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" result_list = [[159109, 2], [192607, 3], [179251, 4], [1, 5]] verify_list = [] shard_num = 4 for i in range(shard_num): data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False, shard_equal_rows=True) padded_samples = [{ 'image': np.zeros(1, np.uint8) }, { 'image': np.zeros(2, np.uint8) }, { 'image': np.zeros(3, np.uint8) }, { 'image': np.zeros(4, np.uint8) }, { 'image': np.zeros(5, np.uint8) }] padded_ds = ds.PaddedDataset(padded_samples) concat_ds = data + padded_ds testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) concat_ds.use_sampler(testsampler) shard_list = [] for item in concat_ds.create_dict_iterator(num_epochs=1, output_numpy=True): shard_list.append(len(item['image'])) verify_list.append(shard_list) assert verify_list == result_list
def test_random_crop_02_c(): """ Test RandomCrop op with c_transforms: size is a list/tuple with length 2, expected to pass """ logger.info("test_random_crop_02_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is a sequence of length 2, it should be (height, width). random_crop_op = c_vision.RandomCrop([512, 375]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_02_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_ten_crop_md5(): """ Tests TenCrops for giving the same results in multiple runs. Since TenCrop is a deterministic function, we expect it to return the same result for a specific input every time """ logger.info("test_ten_crop_md5") data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms_2 = [ vision.Decode(), vision.TenCrop((200, 100), use_vertical_flip=True), lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 10 images ] transform_2 = vision.ComposeOp(transforms_2) data2 = data2.map(input_columns=["image"], operations=transform_2()) # Compare with expected md5 from images filename = "ten_crop_01_result.npz" save_and_check_md5(data2, filename, generate_golden=GENERATE_GOLDEN)
def create_squad_dataset(batch_size=1, repeat_count=1, data_file_path=None, schema_file_path=None, is_training=True, do_shuffle=True): """create finetune or evaluation dataset""" type_cast_op = C.TypeCast(mstype.int32) if is_training: data_set = ds.TFRecordDataset( [data_file_path], schema_file_path if schema_file_path != "" else None, columns_list=[ "input_ids", "input_mask", "segment_ids", "start_positions", "end_positions", "unique_ids", "is_impossible" ], shuffle=do_shuffle) data_set = data_set.map(operations=type_cast_op, input_columns="start_positions") data_set = data_set.map(operations=type_cast_op, input_columns="end_positions") else: data_set = ds.GeneratorDataset(generator_squad(data_file_path), shuffle=do_shuffle, column_names=[ "input_ids", "input_mask", "segment_ids", "unique_ids" ]) data_set = data_set.map(operations=type_cast_op, input_columns="segment_ids") data_set = data_set.map(operations=type_cast_op, input_columns="input_mask") data_set = data_set.map(operations=type_cast_op, input_columns="input_ids") data_set = data_set.map(operations=type_cast_op, input_columns="unique_ids") data_set = data_set.repeat(repeat_count) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) return data_set
def test_random_crop_04_c(): """ Test RandomCrop op with c_transforms: input image size < crop size, expected to fail """ logger.info("test_random_crop_04_c") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4033]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) try: data.create_dict_iterator(num_epochs=1).__next__() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "crop size is greater than the image dimensions or is zero" in str( e)
def test_to_pil_01(): """ Test ToPIL Op with md5 comparison: input is already PIL image Expected to pass """ logger.info("test_to_pil_01") # Generate dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ py_vision.Decode(), # If input is already PIL image. py_vision.ToPIL(), py_vision.CenterCrop(375), py_vision.ToTensor() ] transform = py_vision.ComposeOp(transforms) data1 = data1.map(input_columns=["image"], operations=transform()) # Compare with expected md5 from images filename = "to_pil_01_result.npz" save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN)
def test_five_crop_error_msg(): """ Test FiveCrop error message. """ logger.info("test_five_crop_error_msg") data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ vision.Decode(), vision.FiveCrop(200), vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data = data.map(operations=transform, input_columns=["image"]) with pytest.raises(RuntimeError) as info: for _ in data: pass error_msg = "TypeError: __call__() takes 2 positional arguments but 6 were given" # error msg comes from ToTensor() assert error_msg in str(info.value)
def test_case_2(): """ Test PyFunc """ print("Test n-1 PyFunc : (lambda x, y : x + y) ") col = ["col0", "col1"] # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds1.map(operations=(lambda x, y: x + y), input_columns=col, output_columns="out") print("************** Output Tensor *****************") for data in ds1.create_dict_iterator( num_epochs=1, output_numpy=True): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" print(data["out"]) print("************** Output Tensor *****************")
def skip_test_2ops_shuffle_repeat(): """ Test Shuffle then Repeat """ logger.info("Test Shuffle then Repeat") # define parameters repeat_count = 2 buffer_size = 5 seed = 0 parameters = {"params": {'repeat_count': repeat_count, 'buffer_size': buffer_size, 'reshuffle_each_iteration': False, 'seed': seed}} # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds.config.set_seed(seed) data1 = data1.shuffle(buffer_size=buffer_size) data1 = data1.repeat(repeat_count) filename = "test_2ops_shuffle_repeat.npz" save_and_check(data1, parameters, filename, generate_golden=GENERATE_GOLDEN)
def test_case_1(): """ Test PyFunc """ print("Test 1-n PyFunc : (lambda x : (x , x + x)) ") col = "col0" # apply dataset operations ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) ds1 = ds1.map(input_column_names=col, output_column_names=["out0", "out1"], operation=(lambda x: (x, x + x))) print("************** Output Tensor *****************") for data in ds1.create_dict_iterator(): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" print("out0") print(data["out0"]) print("out1") print(data["out1"]) print("************** Output Tensor *****************")
def create_ner_dataset(batch_size=1, repeat_count=1, assessment_method="accuracy", data_file_path=None, dataset_format="mindrecord", schema_file_path=None, do_shuffle=True, drop_remainder=True): """create finetune or evaluation dataset""" type_cast_op = C.TypeCast(mstype.int32) if dataset_format == "mindrecord": dataset = ds.MindDataset([data_file_path], columns_list=[ "input_ids", "input_mask", "segment_ids", "label_ids" ], shuffle=do_shuffle) else: dataset = ds.TFRecordDataset( [data_file_path], schema_file_path if schema_file_path != "" else None, columns_list=[ "input_ids", "input_mask", "segment_ids", "label_ids" ], shuffle=do_shuffle) if assessment_method == "Spearman_correlation": type_cast_op_float = C.TypeCast(mstype.float32) dataset = dataset.map(operations=type_cast_op_float, input_columns="label_ids") else: dataset = dataset.map(operations=type_cast_op, input_columns="label_ids") dataset = dataset.map(operations=type_cast_op, input_columns="segment_ids") dataset = dataset.map(operations=type_cast_op, input_columns="input_mask") dataset = dataset.map(operations=type_cast_op, input_columns="input_ids") dataset = dataset.repeat(repeat_count) # apply batch operations dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) return dataset
def test_random_crop_03_c(): """ Test RandomCrop op with c_transforms: input image size == crop size, expected to pass """ logger.info("test_random_crop_03_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 random_crop_op = c_vision.RandomCrop([2268, 4032]) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_03_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_case_2(): """ Test PyFunc """ logger.info("Test n-1 PyFunc : lambda x, y : x + y ") col = ["col0", "col1"] # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) data1 = data1.map(input_columns=col, output_columns="out", operations=(lambda x, y: x + y), columns_order=["out"]) i = 0 for item in data1.create_dict_iterator(): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) i = i + 4
def test_repeat_count2(): data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False) data1_size = data1.get_dataset_size() logger.info("dataset size is {}".format(data1_size)) batch_size = 2 repeat_count = 4 resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.batch(batch_size, drop_remainder=False) data1 = data1.repeat(repeat_count) dataset_size = data1.get_dataset_size() logger.info("dataset batch then repeat's size is {}".format(dataset_size)) num1_iter = 0 for _ in data1.create_dict_iterator(): num1_iter += 1 assert data1_size == 3 assert dataset_size == num1_iter == 8
def test_random_crop_04_py(): """ Test RandomCrop op with py_transforms: input image size < crop size, expected to fail """ logger.info("test_random_crop_04_py") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The size of the image is 4032*2268 transforms = [ py_vision.Decode(), py_vision.RandomCrop([2268, 4033]), py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data = data.map(operations=transform, input_columns=["image"]) try: data.create_dict_iterator(num_epochs=1).__next__() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e)
def test_tf_repeat_03(): """ Test Repeat then Batch. """ logger.info("Test Repeat then Batch") data1 = ds.TFRecordDataset(DATA_DIR_TF2, SCHEMA_DIR_TF2, shuffle=False) batch_size = 32 resize_height, resize_width = 32, 32 decode_op = vision.Decode() resize_op = vision.Resize((resize_height, resize_width), interpolation=ds.transforms.vision.Inter.LINEAR) data1 = data1.map(input_columns=["image"], operations=decode_op) data1 = data1.map(input_columns=["image"], operations=resize_op) data1 = data1.repeat(22) data1 = data1.batch(batch_size, drop_remainder=True) num_iter = 0 for _ in data1.create_dict_iterator(): num_iter += 1 logger.info("Number of tf data in data1: {}".format(num_iter)) assert num_iter == 2
def get_dataset(batch_size=1, repeat_count=1, distribute_file=''): ''' get dataset ''' ds = de.TFRecordDataset( [cfg.data_file], cfg.schema_file, columns_list=["input_ids", "input_mask", "segment_ids", "label_ids"]) type_cast_op = C.TypeCast(mstype.int32) ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(input_columns="label_ids", operations=type_cast_op) ds = ds.repeat(repeat_count) # apply shuffle operation buffer_size = 960 ds = ds.shuffle(buffer_size=buffer_size) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) return ds
def test_case_7(): """ Test PyFunc """ logger.info("Test 1-1 PyFunc Multiprocess: lambda x : x + x") # apply dataset operations data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, shuffle=False) data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out", num_parallel_workers=4, python_multiprocessing=True) i = 0 for item in data1.create_dict_iterator( num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) i = i + 4
def test_random_horizontal_invalid_prob_c(): """ Test RandomHorizontalFlip op in c_transforms: invalid input, expect to raise error """ logger.info("test_random_horizontal_invalid_prob_c") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() try: # Note: Valid range of prob should be [0.0, 1.0] random_horizontal_op = c_vision.RandomHorizontalFlip(1.5) data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_horizontal_op) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input prob is not within the required interval of (0.0 to 1.0)." in str( e)
def test_five_crop_md5(): """ Test FiveCrop with md5 check """ logger.info("test_five_crop_md5") # First dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [ vision.Decode(), vision.FiveCrop(100), lambda images: np.stack([vision.ToTensor()(image) for image in images]) # 4D stack of 5 images ] transform = vision.ComposeOp(transforms) data = data.map(input_columns=["image"], operations=transform()) # Compare with expected md5 from images filename = "five_crop_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN)
def test_random_rotation_expand(): """ Test RandomRotation op """ logger.info("test_random_rotation_op") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() # expand is set to be True to match output size random_rotation_op = c_vision.RandomRotation((0, 90), expand=True) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=random_rotation_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): rotation = item["image"] logger.info("shape after rotate: {}".format(rotation.shape)) num_iter += 1
def test_random_crop_01_c(): """ Test RandomCrop op with c_transforms: size is a single integer, expected to pass """ logger.info("test_random_crop_01_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: If size is an int, a square crop of size (size, size) is returned. random_crop_op = c_vision.RandomCrop(512) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_01_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_cache_nomap_basic4(): """ A TF reader dataset (a non mappable dataset) with a map decode and cache after it Since a global shuffle is used for the tf reader, it will inject a shuffle op over the tf. But, if there's a cache later, that shuffle becomes invalid and should be removed. Repeat | Cache | Map(decode) | TFReader """ logger.info("Test cache nomap basic 4") # This dataset has 3 records in it only some_cache = ds.DatasetCache(session_id=1, size=0, spilling=True) # With shuffle not being set, TF defaults to a "global" shuffle when there is no cache # in the picture. This causes a shuffle-injection over the TF. For clarify, this test will # explicitly give the global option, even though it's the default in python. # But, when caching is added in the ascendent tree above TF, we do global shuffling # through the sampler over the cache, not by the shuffle op. In that case, tree prepare # will remove the shuffle op that got injected by the initial tree creation. ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=ds.Shuffle.GLOBAL) decode_op = c_vision.Decode() ds1 = ds1.map(input_columns=["image"], operations=decode_op, cache=some_cache) ds1 = ds1.repeat(4) num_iter = 0 for _ in ds1.create_dict_iterator(): num_iter += 1 logger.info("Number of data in ds1: {} ".format(num_iter)) assert num_iter == 12 logger.info("test_cache_nomap_basic4 Ended.\n")
def test_profiling_complex_pipeline(): """ Generator -> Map -> -> Zip TFReader -> Shuffle -> """ os.environ['PROFILING_MODE'] = 'true' os.environ['MINDDATA_PROFILING_DIR'] = '.' os.environ['DEVICE_ID'] = '1' source = [(np.array([x]),) for x in range(1024)] data1 = ds.GeneratorDataset(source, ["gen"]) data1 = data1.map(operations=[(lambda x: x + 1)], input_columns=["gen"]) pattern = DATASET_ROOT + "/test.data" data2 = ds.TFRecordDataset(pattern, SCHEMA_FILE, shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(4) data3 = ds.zip((data1, data2)) for _ in data3: pass with open(PIPELINE_FILE) as f: data = json.load(f) op_info = data["op_info"] assert len(op_info) == 5 for i in range(5): assert "size" in op_info[i]["metrics"]["output_queue"] assert "length" in op_info[i]["metrics"]["output_queue"] assert "throughput" in op_info[i]["metrics"]["output_queue"] assert os.path.exists(PIPELINE_FILE) is True os.remove(PIPELINE_FILE) assert os.path.exists(DATASET_ITERATOR_FILE) is True os.remove(DATASET_ITERATOR_FILE) del os.environ['PROFILING_MODE'] del os.environ['MINDDATA_PROFILING_DIR']
def test_random_crop_and_resize_05_c(): """ Test RandomCropAndResize with c_transforms: invalid range of ratio (max<min), expected to raise ValueError """ logger.info("test_random_crop_and_resize_05_c") # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c_vision.Decode() try: random_crop_and_resize_op = c_vision.RandomResizedCrop( (256, 512), (1, 1), (1, 0.5)) # If input range of ratio is not in the order of (min, max), ValueError will be raised. data = data.map(input_columns=["image"], operations=decode_op) data = data.map(input_columns=["image"], operations=random_crop_and_resize_op) except ValueError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Input range is not valid" in str(e)
def get_squad_dataset(batch_size=1, repeat_count=1, distribute_file=''): ''' get SQuAD dataset ''' ds = de.TFRecordDataset([cfg.data_file], cfg.schema_file, columns_list=[ "input_ids", "input_mask", "segment_ids", "start_positions", "end_positions", "unique_ids", "is_impossible" ]) type_cast_op = C.TypeCast(mstype.int32) ds = ds.map(input_columns="segment_ids", operations=type_cast_op) ds = ds.map(input_columns="input_ids", operations=type_cast_op) ds = ds.map(input_columns="input_mask", operations=type_cast_op) ds = ds.map(input_columns="start_positions", operations=type_cast_op) ds = ds.map(input_columns="end_positions", operations=type_cast_op) ds = ds.repeat(repeat_count) buffer_size = 960 ds = ds.shuffle(buffer_size=buffer_size) ds = ds.batch(batch_size, drop_remainder=True) return ds
def test_decode_op(): """ Test Decode op """ logger.info("Test Decode") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image", "label"], num_parallel_workers=1, shuffle=False) # define map operations decode_op = c_vision.Decode() # apply map operations on images data1 = data1.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): logger.info("Looping inside iterator {}".format(num_iter)) _ = item["image"] num_iter += 1
def test_random_crop_08_c(): """ Test RandomCrop op with c_transforms: padding_mode is Border.EDGE, expected to pass """ logger.info("test_random_crop_08_c") original_seed = config_get_set_seed(0) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # Note: The padding_mode is Border.EDGE. random_crop_op = c_vision.RandomCrop(512, [200, 200, 200, 200], padding_mode=mode.Border.EDGE) decode_op = c_vision.Decode() data = data.map(operations=decode_op, input_columns=["image"]) data = data.map(operations=random_crop_op, input_columns=["image"]) filename = "random_crop_08_c_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore config setting ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)