def test_auto_contrast_invalid_cutoff_param_py(): """ Test AutoContrast python Op with invalid cutoff parameter """ logger.info("Test AutoContrast python Op with invalid cutoff parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ mindspore.dataset.transforms.py_transforms.Compose([ F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=-10.0), F.ToTensor() ]) ], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of [0, 50)." in str( error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ mindspore.dataset.transforms.py_transforms.Compose([ F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=120.0), F.ToTensor() ]) ], input_columns=["image"]) except ValueError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Input cutoff is not within the required interval of [0, 50)." in str( error)
def test_random_color_py(degrees=(0.1, 1.9), plot=False): """ Test Python RandomColor """ logger.info("Test RandomColor") # Original Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Random Color Adjusted Images data = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_random_color = mindspore.dataset.transforms.py_transforms.Compose( [ F.Decode(), F.Resize((224, 224)), F.RandomColor(degrees=degrees), F.ToTensor() ]) ds_random_color = data.map(operations=transforms_random_color, input_columns="image") ds_random_color = ds_random_color.batch(512) for idx, (image, _) in enumerate(ds_random_color): if idx == 0: images_random_color = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_random_color = np.append(images_random_color, np.transpose( image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_color[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_color)
def test_auto_contrast_invalid_ignore_param_py(): """ Test AutoContrast python Op with invalid ignore parameter """ logger.info("Test AutoContrast python Op with invalid ignore parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ mindspore.dataset.transforms.py_transforms.Compose([ F.Decode(), F.Resize((224, 224)), F.AutoContrast(ignore=255.5), F.ToTensor() ]) ], input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value 255.5 is not of type" in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) data_set = data_set.map(operations=[ mindspore.dataset.transforms.py_transforms.Compose([ F.Decode(), F.Resize((224, 224)), F.AutoContrast(ignore=(10, 100)), F.ToTensor() ]) ], input_columns=["image"]) except TypeError as error: logger.info("Got an exception in DE: {}".format(str(error))) assert "Argument ignore with value (10,100) is not of type" in str( error)
def test_auto_contrast_py(plot=False): """ Test AutoContrast """ logger.info("Test AutoContrast Python Op") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # AutoContrast Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_auto_contrast = \ mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.AutoContrast(cutoff=10.0, ignore=[10, 20]), F.ToTensor()]) ds_auto_contrast = data_set.map(operations=transforms_auto_contrast, input_columns="image") ds_auto_contrast = ds_auto_contrast.batch(512) for idx, (image, _) in enumerate(ds_auto_contrast): if idx == 0: images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_auto_contrast = np.append(images_auto_contrast, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_auto_contrast[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) # Compare with expected md5 from images filename = "autocontrast_01_result_py.npz" save_and_check_md5(ds_auto_contrast, filename, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(images_original, images_auto_contrast)
def test_equalize_py(plot=False): """ Test Equalize py op """ logger.info("Test Equalize") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Color Equalized Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_equalize = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.Equalize(), F.ToTensor()]) ds_equalize = data_set.map(operations=transforms_equalize, input_columns="image") ds_equalize = ds_equalize.batch(512) for idx, (image, _) in enumerate(ds_equalize): if idx == 0: images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_equalize = np.append(images_equalize, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_equalize[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_equalize)
def test_invert_py(plot=False): """ Test Invert python op """ logger.info("Test Invert Python op") # Original Images ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = ds.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Color Inverted Images ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_invert = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.Invert(), F.ToTensor()]) ds_invert = ds.map(operations=transforms_invert, input_columns="image") ds_invert = ds_invert.batch(512) for idx, (image, _) in enumerate(ds_invert): if idx == 0: images_invert = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_invert = np.append(images_invert, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = np.mean((images_invert[i] - images_original[i])**2) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_invert)
def test_c_py_compose_vision_module(plot=False, run_golden=True): """ Test combining Python and C++ vision transforms """ original_seed = config_get_set_seed(10) original_num_parallel_workers = config_get_set_num_parallel_workers(1) def test_config(plot, file_name, op_list): data_dir = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data1 = data1.map(operations=op_list, input_columns=["image"]) data2 = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False) data2 = data2.map(operations=c_vision.Decode(), input_columns=["image"]) original_images = [] transformed_images = [] for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): transformed_images.append(item["image"]) for item in data2.create_dict_iterator(num_epochs=1, output_numpy=True): original_images.append(item["image"]) if run_golden: # Compare with expected md5 from images save_and_check_md5(data1, file_name, generate_golden=GENERATE_GOLDEN) if plot: visualize_list(original_images, transformed_images) test_config(op_list=[c_vision.Decode(), py_vision.ToPIL(), py_vision.Resize((224, 224)), np.array], plot=plot, file_name="compose_c_py_1.npz") test_config(op_list=[c_vision.Decode(), c_vision.Resize((224, 244)), py_vision.ToPIL(), np.array, c_vision.Resize((24, 24))], plot=plot, file_name="compose_c_py_2.npz") test_config(op_list=[py_vision.Decode(), py_vision.Resize((224, 224)), np.array, c_vision.RandomColor()], plot=plot, file_name="compose_c_py_3.npz") # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def test_concat_14(): """ Test concat: Testing concat on two different source datasets with different dataset operations. """ logger.info("test_concat_14") DATA_DIR = "../data/dataset/testPK/data" DATA_DIR2 = "../data/dataset/testImageNetData/train/" data1 = ds.ImageFolderDataset(DATA_DIR, num_samples=3) data2 = ds.ImageFolderDataset(DATA_DIR2, num_samples=2) transforms1 = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) data1 = data1.map(operations=transforms1, input_columns=["image"]) data2 = data2.map(operations=transforms1, input_columns=["image"]) data3 = data1 + data2 expected, output = [], [] for d in data1.create_tuple_iterator(output_numpy=True): expected.append(d[0]) for d in data2.create_tuple_iterator(output_numpy=True): expected.append(d[0]) for d in data3.create_tuple_iterator(output_numpy=True): output.append(d[0]) assert len(expected) == len(output) np.array_equal(np.array(output), np.array(expected)) assert sum([1 for _ in data3]) == 5 assert data3.get_dataset_size() == 5
def skip_test_random_perspective_md5(): """ Test RandomPerspective with md5 comparison """ logger.info("test_random_perspective_md5") original_seed = config_get_set_seed(5) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms = [ py_vision.Decode(), py_vision.RandomPerspective(distortion_scale=0.3, prob=0.7, interpolation=Inter.BILINEAR), py_vision.Resize(1450), # resize to a smaller size to prevent round-off error py_vision.ToTensor() ] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=transform, input_columns=["image"]) # check results with md5 comparison filename = "random_perspective_01_result.npz" save_and_check_md5(data, filename, generate_golden=GENERATE_GOLDEN) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers((original_num_parallel_workers))
def data_generator(args): '''Build train dataloader.''' mindrecord_path = args.mindrecord_path dst_w = args.dst_w dst_h = args.dst_h batch_size = args.per_batch_size attri_num = args.attri_num max_epoch = args.max_epoch transform_img = F2.Compose([ F.Decode(), F.Resize((dst_w, dst_h)), F.RandomHorizontalFlip(prob=0.5), F.ToTensor(), F.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) de_dataset = de.MindDataset(mindrecord_path + "0", columns_list=["image", "label"], num_shards=args.world_size, shard_id=args.local_rank) de_dataset = de_dataset.map(input_columns="image", operations=transform_img, num_parallel_workers=args.workers, python_multiprocessing=True) de_dataset = de_dataset.batch(batch_size, drop_remainder=True) steps_per_epoch = de_dataset.get_dataset_size() de_dataset = de_dataset.repeat(max_epoch) de_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) num_classes = attri_num return de_dataloader, steps_per_epoch, num_classes
def test_random_apply_exception_random_crop_badinput(): """ Test RandomApply: test invalid input for one of the transform functions, expected to raise error """ logger.info("test_random_apply_exception_random_crop_badinput") original_seed = config_get_set_seed(200) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # define map operations transforms_list = [ py_vision.Resize([32, 32]), py_vision.RandomCrop(100), # crop size > image size py_vision.RandomRotation(30) ] transforms = [ py_vision.Decode(), py_transforms.RandomApply(transforms_list, prob=0.6), py_vision.ToTensor() ] transform = py_transforms.Compose(transforms) # Generate dataset data = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data = data.map(operations=transform, input_columns=["image"]) try: _ = data.create_dict_iterator(num_epochs=1).get_next() except RuntimeError as e: logger.info("Got an exception in DE: {}".format(str(e))) assert "Crop size" in str(e) # Restore configuration ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def create_dataset_py(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) else: init() rank_id = get_rank() device_num = get_group_size() if do_train: if device_num == 1: ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) else: ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False) image_size = 224 # define map operations decode_op = P.Decode() resize_crop_op = P.RandomResizedCrop(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) resize_op = P.Resize(256) center_crop = P.CenterCrop(image_size) to_tensor = P.ToTensor() normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # define map operations if do_train: trans = [decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op] else: trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] compose = P2.Compose(trans) ds = ds.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_rgb_hsv_pipeline(): # First dataset transforms1 = [vision.Decode(), vision.Resize([64, 64]), vision.ToTensor()] transforms1 = mindspore.dataset.transforms.py_transforms.Compose( transforms1) ds1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds1 = ds1.map(operations=transforms1, input_columns=["image"]) # Second dataset transforms2 = [ vision.Decode(), vision.Resize([64, 64]), vision.ToTensor(), vision.RgbToHsv(), vision.HsvToRgb() ] transform2 = mindspore.dataset.transforms.py_transforms.Compose( transforms2) ds2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) ds2 = ds2.map(operations=transform2, input_columns=["image"]) num_iter = 0 for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): num_iter += 1 ori_img = data1["image"].asnumpy() cvt_img = data2["image"].asnumpy() assert_allclose(ori_img.flatten(), cvt_img.flatten(), rtol=1e-5, atol=0) assert ori_img.shape == cvt_img.shape
def create_dataset_val(batch_size=128, val_data_url='', workers=8, distributed=False, input_size=224): """Create ImageNet validation dataset""" if not os.path.exists(val_data_url): raise ValueError('Path not exists') rank_id = get_rank() if distributed else 0 rank_size = get_group_size() if distributed else 1 dataset = ds.ImageFolderDataset(val_data_url, num_parallel_workers=workers, num_shards=rank_size, shard_id=rank_id) scale_size = None if isinstance(input_size, tuple): assert len(input_size) == 2 if input_size[-1] == input_size[-2]: scale_size = int(math.floor(input_size[0] / DEFAULT_CROP_PCT)) else: scale_size = tuple([int(x / DEFAULT_CROP_PCT) for x in input_size]) else: scale_size = int(math.floor(input_size / DEFAULT_CROP_PCT)) type_cast_op = c_transforms.TypeCast(mstype.int32) decode_op = py_vision.Decode() resize_op = py_vision.Resize(size=scale_size, interpolation=Inter.BICUBIC) center_crop = py_vision.CenterCrop(size=input_size) to_tensor = py_vision.ToTensor() normalize_op = py_vision.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD) image_ops = py_transforms.Compose( [decode_op, resize_op, center_crop, to_tensor, normalize_op]) dataset = dataset.map(input_columns=["label"], operations=type_cast_op, num_parallel_workers=workers) dataset = dataset.map(input_columns=["image"], operations=image_ops, num_parallel_workers=workers) dataset = dataset.batch(batch_size, per_batch_map=split_imgs_and_labels, input_columns=["image", "label"], num_parallel_workers=2, drop_remainder=True) dataset = dataset.repeat(1) return dataset
def test_resize_op_ANTIALIAS(): """ Test resize_op """ logger.info("Test resize for ANTIALIAS") data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) # define map operations decode_op = py_vision.Decode() resize_op = py_vision.Resize(20, Inter.ANTIALIAS) # apply map operations on images data1 = data1.map(operations=[decode_op, resize_op, py_vision.ToTensor()], input_columns=["image"]) num_iter = 0 for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 logger.info("use Resize by Inter.ANTIALIAS process {} images.".format(num_iter))
def load_images(paths, batch_size=128): '''Load images.''' ll = [] resize = V.Resize((96, 64)) transform = T.Compose( [V.ToTensor(), V.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) for i, _ in enumerate(paths): im = Image.open(paths[i]) im = resize(im) img = np.array(im) ts = transform(img) ll.append(ts[0]) if len(ll) == batch_size: yield np.stack(ll, axis=0) ll.clear() if ll: yield np.stack(ll, axis=0)
def data_generator_eval(args): '''Build eval dataloader.''' mindrecord_path = args.mindrecord_path dst_w = args.dst_w dst_h = args.dst_h batch_size = 1 attri_num = args.attri_num transform_img = F2.Compose([F.Decode(), F.Resize((dst_w, dst_h)), F.ToTensor(), F.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) de_dataset = de.MindDataset(mindrecord_path + "0", columns_list=["image", "label"]) de_dataset = de_dataset.map(input_columns="image", operations=transform_img, num_parallel_workers=args.workers, python_multiprocessing=True) de_dataset = de_dataset.batch(batch_size) de_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) steps_per_epoch = de_dataset.get_dataset_size() print("image number:{0}".format(steps_per_epoch)) num_classes = attri_num return de_dataloader, steps_per_epoch, num_classes
print("==> Loading data") # Data Loading code transform_train = Compose([ decode, py_trans.Pad(10), py_trans.RandomCrop((args.img_h, args.img_w)), py_trans.RandomHorizontalFlip(), py_trans.ToTensor(), py_trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) transform_test = Compose([ decode, py_trans.Resize((args.img_h, args.img_w)), py_trans.ToTensor(), py_trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) ifDebug_dic = {"yes": True, "no": False} if dataset_type == "SYSU": # train_set ifDebug = {} trainset_generator = SYSUDatasetGenerator(data_dir=data_path, ifDebug=ifDebug_dic.get( args.debug)) color_pos, thermal_pos = GenIdx(trainset_generator.train_color_label, trainset_generator.train_thermal_label)
def create_dataset_py(dataset_path, do_train, config, device_target, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1. batch_size(int): the batch size of dataset. Default: 32. Returns: dataset """ if device_target == "Ascend": rank_size = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) if do_train: if rank_size == 1: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) else: data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=False) else: raise ValueError("Unsupported device target.") resize_height = 224 if do_train: buffer_size = 20480 # apply shuffle operations data_set = data_set.shuffle(buffer_size=buffer_size) # define map operations decode_op = P.Decode() resize_crop_op = P.RandomResizedCrop(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = P.RandomHorizontalFlip(prob=0.5) resize_op = P.Resize(256) center_crop = P.CenterCrop(resize_height) to_tensor = P.ToTensor() normalize_op = P.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if do_train: trans = [ decode_op, resize_crop_op, horizontal_flip_op, to_tensor, normalize_op ] else: trans = [decode_op, resize_op, center_crop, to_tensor, normalize_op] compose = P2.Compose(trans) data_set = data_set.map(operations=compose, input_columns="image", num_parallel_workers=8, python_multiprocessing=True) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def create_dataset(dataset_path, do_train, config, platform, repeat_num=1, batch_size=100): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ if platform == "Ascend": rank_size = int(os.getenv("RANK_SIZE")) rank_id = int(os.getenv("RANK_ID")) if rank_size == 1: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True) else: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=rank_size, shard_id=rank_id) elif platform == "GPU": if do_train: from mindspore.communication.management import get_rank, get_group_size data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=get_group_size(), shard_id=get_rank()) else: data_set = ds.MindDataset(dataset_path, num_parallel_workers=8, shuffle=False) else: raise ValueError("Unsupported platform.") resize_height = config.image_height buffer_size = 1000 # define map operations resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) color_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) rescale_op = C.Rescale(1 / 255.0, 0) normalize_op = C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) change_swap_op = C.HWC2CHW() # define python operations decode_p = P.Decode() resize_p = P.Resize(256, interpolation=Inter.BILINEAR) center_crop_p = P.CenterCrop(224) totensor = P.ToTensor() normalize_p = P.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) composeop = P2.Compose( [decode_p, resize_p, center_crop_p, totensor, normalize_p]) if do_train: trans = [ resize_crop_op, horizontal_flip_op, color_op, rescale_op, normalize_op, change_swap_op ] else: trans = composeop type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(input_columns="image", operations=trans, num_parallel_workers=8) data_set = data_set.map(input_columns="label_list", operations=type_cast_op, num_parallel_workers=8) # apply shuffle operations data_set = data_set.shuffle(buffer_size=buffer_size) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def test_uniform_augment(plot=False, num_ops=2): """ Test UniformAugment """ logger.info("Test UniformAugment") # Original Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data_set.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate(ds_original): if idx == 0: images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transform_list = [F.RandomRotation(45), F.RandomColor(), F.RandomSharpness(), F.Invert(), F.AutoContrast(), F.Equalize()] transforms_ua = \ mindspore.dataset.transforms.py_transforms.Compose([F.Decode(), F.Resize((224, 224)), F.UniformAugment(transforms=transform_list, num_ops=num_ops), F.ToTensor()]) ds_ua = data_set.map(operations=transforms_ua, input_columns="image") ds_ua = ds_ua.batch(512) for idx, (image, _) in enumerate(ds_ua): if idx == 0: images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_ua[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_ua)
def test_random_sharpness_py(degrees=(0.7, 0.7), plot=False): """ Test RandomSharpness python op """ logger.info("Test RandomSharpness python op") # Original Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) transforms_original = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), F.ToTensor()]) ds_original = data.map(operations=transforms_original, input_columns="image") ds_original = ds_original.batch(512) for idx, (image, _) in enumerate( ds_original.create_tuple_iterator(output_numpy=True)): if idx == 0: images_original = np.transpose(image, (0, 2, 3, 1)) else: images_original = np.append(images_original, np.transpose(image, (0, 2, 3, 1)), axis=0) # Random Sharpness Adjusted Images data = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) py_op = F.RandomSharpness() if degrees is not None: py_op = F.RandomSharpness(degrees) transforms_random_sharpness = mindspore.dataset.transforms.py_transforms.Compose( [F.Decode(), F.Resize((224, 224)), py_op, F.ToTensor()]) ds_random_sharpness = data.map(operations=transforms_random_sharpness, input_columns="image") ds_random_sharpness = ds_random_sharpness.batch(512) for idx, (image, _) in enumerate( ds_random_sharpness.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness = np.transpose(image, (0, 2, 3, 1)) else: images_random_sharpness = np.append(images_random_sharpness, np.transpose( image, (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] mse = np.zeros(num_samples) for i in range(num_samples): mse[i] = diff_mse(images_random_sharpness[i], images_original[i]) logger.info("MSE= {}".format(str(np.mean(mse)))) if plot: visualize_list(images_original, images_random_sharpness)