def test_cut_out_md5(): """ Test Cutout with md5 check """ logger.info("test_cut_out_md5") original_seed = config_get_set_seed(2) original_num_parallel_workers = config_get_set_num_parallel_workers(1) # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c.Decode() cut_out_op = c.CutOut(100) data1 = data1.map(operations=decode_op, input_columns=["image"]) data1 = data1.map(operations=cut_out_op, input_columns=["image"]) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms = [f.Decode(), f.ToTensor(), f.Cutout(100)] transform = mindspore.dataset.transforms.py_transforms.Compose(transforms) data2 = data2.map(operations=transform, input_columns=["image"]) # Compare with expected md5 from images filename1 = "cut_out_01_c_result.npz" save_and_check_md5(data1, filename1, generate_golden=GENERATE_GOLDEN) filename2 = "cut_out_01_py_result.npz" save_and_check_md5(data2, filename2, generate_golden=GENERATE_GOLDEN) # Restore config ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers)
def test_cut_out_op_multicut(plot=False): """ Test Cutout """ logger.info("test_cut_out") # First dataset data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) transforms_1 = [ f.Decode(), f.ToTensor(), ] transform_1 = mindspore.dataset.transforms.py_transforms.Compose( transforms_1) data1 = data1.map(operations=transform_1, input_columns=["image"]) # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) decode_op = c.Decode() cut_out_op = c.CutOut(80, num_patches=10) transforms_2 = [decode_op, cut_out_op] data2 = data2.map(operations=transforms_2, input_columns=["image"]) num_iter = 0 image_list_1, image_list_2 = [], [] for item1, item2 in zip( data1.create_dict_iterator(num_epochs=1, output_numpy=True), data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose image_2 = item2["image"] image_list_1.append(image_1) image_list_2.append(image_2) logger.info("shape of image_1: {}".format(image_1.shape)) logger.info("shape of image_2: {}".format(image_2.shape)) logger.info("dtype of image_1: {}".format(image_1.dtype)) logger.info("dtype of image_2: {}".format(image_2.dtype)) if plot: visualize_list(image_list_1, image_list_2)
def create_dataset_cifar(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or evaluate cifar10 dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num, rank_id = _get_rank_info() elif target == "CPU": device_num = 1 else: init() rank_id = get_rank() device_num = get_group_size() if device_num == 1: data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) else: data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations if do_train: trans = [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4), C.Resize((227, 227)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.CutOut(112), C.HWC2CHW() ] else: trans = [ C.Resize((227, 227)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set
def create_dataset_imagenet(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or eval imagenet dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num, rank_id = _get_rank_info() else: init() rank_id = get_rank() device_num = get_group_size() if device_num == 1: data_set = ds.ImageFolderDataset(dataset_path, shuffle=True) else: data_set = ds.ImageFolderDataset(dataset_path, shuffle=True, num_shards=device_num, shard_id=rank_id) image_size = 227 mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] std = [0.229 * 255, 0.224 * 255, 0.225 * 255] # define map operations if do_train: trans = [ C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), C.RandomHorizontalFlip(prob=0.5), C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4), C.Normalize(mean=mean, std=std), C.CutOut(112), C.HWC2CHW() ] else: trans = [ C.Decode(), C.Resize((256, 256)), C.CenterCrop(image_size), C.Normalize(mean=mean, std=std), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) data_set = data_set.map(operations=type_cast_op, input_columns="label") data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=10) # apply batch operations data_set = data_set.batch(batch_size, drop_remainder=True) # apply dataset repeat operation data_set = data_set.repeat(repeat_num) return data_set