def test_imagefolder(): data = ds.ImageFolderDataset("../data/dataset/testPK/data/") assert data.get_dataset_size() == 44 assert data.num_classes() == 4 data = data.shuffle(100) assert data.num_classes() == 4 data = ds.ImageFolderDataset("../data/dataset/testPK/data/", num_samples=10) assert data.get_dataset_size() == 10 assert data.num_classes() == 4 data = ds.ImageFolderDataset("../data/dataset/testPK/data/", class_indexing={ "class1": 1, "class2": 22 }) assert data.num_classes() == 2 data = ds.ImageFolderDataset("../data/dataset/testPK/data/", class_indexing={ "class1": 1, "wrong name": 22 }) err_msg = "" try: data.num_classes() except RuntimeError as e: err_msg = str(e) assert "wrong name doesn't exist" in err_msg
def test_auto_contrast_invalid_cutoff_param_py(): """ Test AutoContrast python Op with invalid cutoff parameter """ logger.info("Test AutoContrast python Op with invalid cutoff parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast( cutoff=-10.0), F.ToTensor()])], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map( operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=120.0), F.ToTensor()])], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
def test_sampler_list(): data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5]) data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1) data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3) data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5) dataset_equal(data1, data21 + data22 + data23, 0)
def test_random_color_py(degrees=(0.1, 1.9), plot=False): """ Test Python RandomColor """ logger.info("Test RandomColor") # Original Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Random Color Adjusted Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_random_color = mindspore.dataset.transforms.py_transforms.Compose( [ F.Decode(), F.Resize((224, 224)), F.RandomColor(degrees=degrees), F.ToTensor() ]) ds_random_color = data.map(operations=transforms_random_color, input_columns="image") ds_random_color = ds_random_color.batch(512) for idx, (image, _) in enumerate(ds_random_color): if idx == 0: images_random_color = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_random_color = np.append(images_random_color, np.transpose( image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_color[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_color)
def test_auto_contrast_invalid_cutoff_param_c(): """ Test AutoContrast C Op with invalid cutoff parameter """ logger.info("Test AutoContrast C Op with invalid cutoff parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(cutoff=-10.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(cutoff=120.0), input_columns="image") except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of (0 to 100)." in str(error)
def test_auto_contrast_invalid_ignore_param_py(): """ Test AutoContrast python Op with invalid ignore parameter """ logger.info("Test AutoContrast python Op with invalid ignore parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast( ignore=255.5), F.ToTensor()])], input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast( ignore=(10, 100)), F.ToTensor()])], input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str(error)
def test_auto_contrast_invalid_ignore_param_c(): """ Test AutoContrast C Op with invalid ignore parameter """ logger.info("Test AutoContrast C Op with invalid ignore parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(ignore=255.5), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0]) ], input_columns=["image"]) # invalid ignore data_set = data_set.map(operations=C.AutoContrast(ignore=(10, 100)), input_columns="image") except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str( error)
def test_imagefolder_zip(): logger.info("Test Case zip") # define parameters repeat_count = 2 # apply dataset operations data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=10) data2 = ds.ImageFolderDataset(DATA_DIR, num_samples=10) data1 = data1.repeat(repeat_count) # rename dataset2 for no conflict data2 = data2.rename(input_columns=["image", "label"], output_columns=["image1", "label1"]) data3 = ds.zip((data1, data2)) num_iter = 0 for item in data3.create_dict_iterator( num_epochs=1): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) num_iter += 1 logger.info("Number of data in data1: {}".format(num_iter)) assert num_iter == 10
def test_sampler_list(): data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=[1, 3, 5]) data21 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(2).skip(1) data22 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(4).skip(3) data23 = ds.ImageFolderDataset("../data/dataset/testPK/data", shuffle=False).take(6).skip(5) dataset_equal(data1, data21 + data22 + data23, 0) data3 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=1) dataset_equal(data3, data21, 0) def bad_pipeline(sampler, msg): with pytest.raises(Exception) as info: data1 = ds.ImageFolderDataset("../data/dataset/testPK/data", sampler=sampler) for _ in data1: pass assert msg in str(info.value) bad_pipeline(sampler=[1.5, 7], msg="Type of indices element must be int, but got list[0]: 1.5, type: <class 'float'>") bad_pipeline(sampler=["a", "b"], msg="Type of indices element must be int, but got list[0]: a, type: <class 'str'>.") bad_pipeline(sampler="a", msg="Unsupported sampler object of type (<class 'str'>)") bad_pipeline(sampler="", msg="Unsupported sampler object of type (<class 'str'>)") bad_pipeline(sampler=np.array([1, 2]), msg="Type of indices element must be int, but got list[0]: 1, type: <class 'numpy.int64'>.")
def test_config(plot, file_name, op_list): data_dir = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data1 = data1.map(operations=op_list, input_columns=["image"]) data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data2 = data2.map(operations=c_vision.Decode(), input_columns=["image"]) original_images = [] transformed_images = [] for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): transformed_images.append(item["image"]) for item in data2.create_dict_iterator(num_epochs=1, output_numpy=True): original_images.append(item["image"]) if run_golden: # Compare with expected md5 from images save_and_check_md5(data1, file_name, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(original_images, transformed_images)
def test_concat_14(): """ Test concat: Testing concat on two different source datasets with different dataset operations. """ logger.info("test_concat_14") DATA_DIR = "../data/dataset/testPK/data" DATA_DIR2 = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=3) data2 = ds.ImageFolderDataset(DATA_DIR2, num_samples=2) transforms1 = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) data1 = data1.map(operations=transforms1, input_columns=["image"]) data2 = data2.map(operations=transforms1, input_columns=["image"]) data3 = data1 + data2 expected, output = [], [] for d in data1.create_tuple_iterator(output_numpy=True): expected.append(d[0]) for d in data2.create_tuple_iterator(output_numpy=True): expected.append(d[0]) for d in data3.create_tuple_iterator(output_numpy=True): output.append(d[0]) assert len(expected) == len(output) np.array_equal(np.array(output), np.array(expected)) assert sum([1 for _ in data3]) == 5 assert data3.get_dataset_size() == 5
def test_auto_contrast_py(plot=False): """ Test AutoContrast """ logger.info("Test AutoContrast Python Op") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_auto_contrast = \ mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=10.0, ignore=[10, 20]), F.ToTensor()]) ds_auto_contrast = data_set.map(operations=transforms_auto_contrast, input_columns="image") ds_auto_contrast = ds_auto_contrast.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast): if idx == 0: images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_auto_contrast = np.append(images_auto_contrast, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) # Compare with expected md5 from images filename = "autocontrast_01_result_py.npz" save_and_check_md5(ds_auto_contrast, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(images_original, images_auto_contrast)
def test_equalize_py(plot=False): """ Test Equalize py op """ logger.info("Test Equalize") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Color Equalized Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_equalize = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.Equalize(), F.ToTensor()]) ds_equalize = data_set.map(operations=transforms_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) for idx, (image, _) in enumerate(ds_equalize): if idx == 0: images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_equalize = np.append(images_equalize, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_equalize[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_equalize)
def test_cpp_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test CPP UniformAugment") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224]), F.ToTensor()] ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]), C.RandomHorizontalFlip(), C.RandomVerticalFlip(), C.RandomColorAdjust(), C.RandomRotation(degrees=45)] uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops) transforms_all = [C.Decode(), C.Resize(size=[224, 224]), uni_aug, F.ToTensor()] ds_ua = data_set.map(operations=transforms_all, input_columns="image", num_parallel_workers=1) ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_equalize_py_c(plot=False): """ Test Equalize Cpp op and python op """ logger.info("Test Equalize cpp and python op") # equalize Images in cpp data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_c_equalize = data_set.map(operations=C.Equalize(), input_columns="image") ds_c_equalize = ds_c_equalize.batch(512) for idx, (image, _) in enumerate(ds_c_equalize): if idx == 0: images_c_equalize = image.asnumpy() else: images_c_equalize = np.append(images_c_equalize, image.asnumpy(), axis=0) # Equalize images in python data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) transforms_p_equalize = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: img.astype(np.uint8), F.ToPIL(), F.Equalize(), np.array]) ds_p_equalize = data_set.map(operations=transforms_p_equalize, input_columns="image") ds_p_equalize = ds_p_equalize.batch(512) for idx, (image, _) in enumerate(ds_p_equalize): if idx == 0: images_p_equalize = image.asnumpy() else: images_p_equalize = np.append(images_p_equalize, image.asnumpy(), axis=0) num_samples = images_c_equalize.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_p_equalize[i], images_c_equalize[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_c_equalize, images_p_equalize, visualize_mode=2)
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="GPU", dtype="fp16", device_num=1): ds.config.set_numa_enable(True) if device_num == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=4, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=4, shuffle=True, num_shards=device_num, shard_id=get_rank()) image_size = 224 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # define map operations normalize_op = C.Normalize(mean=mean, std=std) if dtype == "fp16": if args_opt.eval: x_dtype = "float32" else: x_dtype = "float16" normalize_op = C.NormalizePad(mean=mean, std=std, dtype=x_dtype) if do_train: trans = [ C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomHorizontalFlip(prob=0.5), normalize_op, ] else: trans = [ C.Decode(), C.Resize(256), C.CenterCrop(image_size), normalize_op, ] if dtype == "fp32": trans.append(C.HWC2CHW()) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation if repeat_num > 1: data_set = data_set.repeat(repeat_num) return data_set
def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): """ Test RandomSharpness cpp op """ print(degrees) logger.info("Test RandomSharpness cpp op") # Original Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize((224, 224))] ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate( ds_original.create_tuple_iterator(output_numpy=True)): if idx == 0: images_original = image else: images_original = np.append(images_original, image, axis=0) # Random Sharpness Adjusted Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) c_op = C.RandomSharpness() if degrees is not None: c_op = C.RandomSharpness(degrees) transforms_random_sharpness = [C.Decode(), C.Resize((224, 224)), c_op] ds_random_sharpness = data.map(operations=transforms_random_sharpness, input_columns="image") ds_random_sharpness = ds_random_sharpness.batch(512) for idx, (image, _) in enumerate( ds_random_sharpness.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness = image else: images_random_sharpness = np.append(images_random_sharpness, image, axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_sharpness)
def create_dataset(dataset_path, config, do_train, repeat_num=1): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. config(dict): config of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1. Returns: dataset """ rank = config.rank group_size = config.group_size if group_size == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=True, num_shards=group_size, shard_id=rank) # define map operations if do_train: trans = [ C.RandomCropDecodeResize(config.image_size), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, saturation=0.5) # fast mode ] else: trans = [ C.Decode(), C.Resize(int(config.image_size / 0.875)), C.CenterCrop(config.image_size) ] trans += [ C.Rescale(1.0 / 255.0, 0.0), C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=config.work_nums) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=config.work_nums) # apply batch operations data_set = data_set.batch(config.batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def test_auto_contrast_one_channel_c(plot=False): """ Test AutoContrast C op with one channel """ logger.info("Test AutoContrast C Op With One Channel Images") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast() c_op = C.AutoContrast() # not using F.ToTensor() since it converts to floats transforms_op = mindspore.dataset.transforms.py_transforms.Compose( [lambda img: (np.array(img)[:, :, 0]).astype(np.uint8), F.ToPIL(), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_auto_contrast_c(plot=False): """ Test AutoContrast C Op """ logger.info("Test AutoContrast C Op") # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) python_op = F.AutoContrast(cutoff=10.0, ignore=[10, 20]) c_op = C.AutoContrast(cutoff=10.0, ignore=[10, 20]) transforms_op = mindspore.dataset.transforms.py_transforms.Compose([lambda img: F.ToPIL()(img.astype(np.uint8)), python_op, np.array]) ds_auto_contrast_py = data_set.map(operations=transforms_op, input_columns="image") ds_auto_contrast_py = ds_auto_contrast_py.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, image.asnumpy(), axis=0) data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224))], input_columns=["image"]) ds_auto_contrast_c = data_set.map(operations=c_op, input_columns="image") ds_auto_contrast_c = ds_auto_contrast_c.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast_c[i], images_auto_contrast_py[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) np.testing.assert_equal(np.mean(mse), 0.0) # Compare with expected md5 from images filename = "autocontrast_01_result_c.npz" save_and_check_md5(ds_auto_contrast_c, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(images_auto_contrast_c, images_auto_contrast_py, visualize_mode=2)
def test_cutmix_batch_success3(plot=False): """ Test CutMixBatch op with default values for alpha and prob on a batch of HWC images on ImageFolderDataset """ logger.info("test_cutmix_batch_success3") ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) ds_original = ds_original.map(operations=[resize_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) resize_op = vision.Resize([224, 224]) data1 = data1.map(operations=[resize_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) cutmix_batch_op = vision.CutMixBatch(mode.ImageBatchFormat.NHWC) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: images_cutmix = image.asnumpy() else: images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_cutmix[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_imagefolder(): data = ds.ImageFolderDataset("../data/dataset/testPK/data/") assert data.get_dataset_size() == 44 assert data.num_classes() == 4 data = data.shuffle(100) assert data.num_classes() == 4 data = ds.ImageFolderDataset("../data/dataset/testPK/data/", num_samples=10) assert data.get_dataset_size() == 10 assert data.num_classes() == 4
def create_dataset(dataset_path, do_train, batch_size=16, device_num=1, rank=0): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. batch_size(int): the batch size of dataset. Default: 16. device_num (int): Number of shards that the dataset should be divided into (default=1). rank (int): The shard ID within num_shards (default=0). Returns: dataset """ if device_num == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank) # define map operations if do_train: trans = [ C.RandomCropDecodeResize(299), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) ] else: trans = [C.Decode(), C.Resize(320), C.CenterCrop(299)] trans += [ C.Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]), C.HWC2CHW(), C2.TypeCast(mstype.float32) ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(input_columns="image", operations=trans, num_parallel_workers=8) data_set = data_set.map(input_columns="label", operations=type_cast_op, num_parallel_workers=8) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) return data_set
def test_imagenet_rawdata_dataset_size(): ds_total = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR) assert ds_total.get_dataset_size() == 6 ds_shard_1_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=1, shard_id=0) assert ds_shard_1_0.get_dataset_size() == 6 ds_shard_2_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=2, shard_id=0) assert ds_shard_2_0.get_dataset_size() == 3 ds_shard_3_0 = ds.ImageFolderDataset(IMAGENET_RAWDATA_DIR, num_shards=3, shard_id=0) assert ds_shard_3_0.get_dataset_size() == 2
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ Create a train or eval dataset. Args: dataset_path (str): The path of dataset. do_train (bool): Whether dataset is used for train or eval. repeat_num (int): The repeat times of dataset. Default: 1. batch_size (int): The batch size of dataset. Default: 32. Returns: Dataset. """ do_shuffle = bool(do_train) if device_num == 1 or not do_train: ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=do_shuffle) else: ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=do_shuffle, num_shards=device_num, shard_id=device_id) image_length = 299 if do_train: trans = [ C.RandomCropDecodeResize(image_length, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) ] else: trans = [ C.Decode(), C.Resize(image_length), C.CenterCrop(image_length) ] trans += [ C.Rescale(1.0 / 255.0, 0.0), C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums) ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. rank (int): The shard ID within num_shards (default=None). group_size (int): Number of shards that the dataset should be divided into (default=None). repeat_num(int): the repeat times of dataset. Default: 1. Returns: dataset """ if group_size == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, num_shards=group_size, shard_id=rank) # define map operations if do_train: trans = [ C.RandomCropDecodeResize(224), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) ] else: trans = [C.Decode(), C.Resize(256), C.CenterCrop(224)] trans += [ toBGR(), C.Rescale(1.0 / 255.0, 0.0), C.HWC2CHW(), C2.TypeCast(mstype.float32) ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) # apply batch operations data_set = data_set.batch(cfg.batch_size, drop_remainder=True) return data_set
def create_dataset_imagenet(dataset_path, do_train, cfg, repeat_num=1): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. cfg (dict): the config for creating dataset. repeat_num(int): the repeat times of dataset. Default: 1. Returns: dataset """ if cfg.group_size == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=cfg.work_nums, shuffle=True, num_shards=cfg.group_size, shard_id=cfg.rank) # define map operations if do_train: trans = [ C.RandomCropDecodeResize(299, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) ] else: trans = [C.Decode(), C.Resize(299), C.CenterCrop(299)] trans += [ C.Rescale(1.0 / 255.0, 0.0), C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=cfg.work_nums) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=cfg.work_nums) # apply batch operations data_set = data_set.batch(cfg.batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def test_mixup_batch_success2(plot=False): """ Test MixUpBatch op with specified alpha parameter on ImageFolderDataset """ logger.info("test_mixup_batch_success2") # Original Images ds_original = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() ds_original = ds_original.map(operations=[decode_op], input_columns=["image"]) ds_original = ds_original.batch(4, pad_info={}, drop_remainder=True) images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) decode_op = vision.Decode() data1 = data1.map(operations=[decode_op], input_columns=["image"]) one_hot_op = data_trans.OneHot(num_classes=10) data1 = data1.map(operations=one_hot_op, input_columns=["label"]) mixup_batch_op = vision.MixUpBatch(2.0) data1 = data1.batch(4, pad_info={}, drop_remainder=True) data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) images_mixup = None for idx, (image, _) in enumerate(data1): if idx == 0: images_mixup = image.asnumpy() else: images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_mixup[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_equalize_c(plot=False): """ Test Equalize Cpp op """ logger.info("Test Equalize cpp op") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = [C.Decode(), C.Resize(size=[224, 224])] ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = image.asnumpy() else: images_original = np.append(images_original, image.asnumpy(), axis=0) # Equalize Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transform_equalize = [C.Decode(), C.Resize(size=[224, 224]), C.Equalize()] ds_equalize = data_set.map(operations=transform_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) for idx, (image, _) in enumerate(ds_equalize): if idx == 0: images_equalize = image.asnumpy() else: images_equalize = np.append(images_equalize, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_equalize) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_equalize[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse))))
def test_imagefolder_exception(): logger.info("Test imagefolder exception") def exception_func(item): raise Exception("Error occur!") def exception_func2(image, label): raise Exception("Error occur!") try: data = ds.ImageFolderDataset(DATA_DIR) data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data = ds.ImageFolderDataset(DATA_DIR) data = data.map(operations=exception_func2, input_columns=["image", "label"], output_columns=["image", "label", "label1"], column_order=["image", "label", "label1"], num_parallel_workers=1) for _ in data.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e) try: data = ds.ImageFolderDataset(DATA_DIR) data = data.map(operations=vision.Decode(), input_columns=["image"], num_parallel_workers=1) data = data.map(operations=exception_func, input_columns=["image"], num_parallel_workers=1) for _ in data.__iter__(): pass assert False except RuntimeError as e: assert "map operation: [PyFunc] failed. The corresponding data files" in str( e)