示例#1
0
def test_deterministic_run_distribution():
    """
    Test deterministic run with with setting the seed being used in a distribution
    """
    logger.info("test_deterministic_run_distribution")

    # Save original configuration values
    num_parallel_workers_original = ds.config.get_num_parallel_workers()
    seed_original = ds.config.get_seed()

    # when we set the seed all operations within our dataset should be deterministic
    ds.config.set_seed(0)
    ds.config.set_num_parallel_workers(1)

    # First dataset
    data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    random_horizontal_flip_op = c_vision.RandomHorizontalFlip(0.1)
    decode_op = c_vision.Decode()
    data1 = data1.map(operations=decode_op, input_columns=["image"])
    data1 = data1.map(operations=random_horizontal_flip_op, input_columns=["image"])

    # Second dataset
    data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False)
    data2 = data2.map(operations=decode_op, input_columns=["image"])
    # If seed is set up on constructor, so the two ops output deterministic sequence
    random_horizontal_flip_op2 = c_vision.RandomHorizontalFlip(0.1)
    data2 = data2.map(operations=random_horizontal_flip_op2, input_columns=["image"])

    dataset_equal(data1, data2, 0)

    # Restore original configuration values
    ds.config.set_num_parallel_workers(num_parallel_workers_original)
    ds.config.set_seed(seed_original)
def sync_random_Horizontal_Flip(input_images, target_images):
    '''
      Randomly flip the input images and the target images.
    '''
    seed = np.random.randint(0, 2000000000)
    mindspore.set_seed(seed)
    op = C.RandomHorizontalFlip(prob=0.5)
    out_input = op(input_images)
    mindspore.set_seed(seed)
    op = C.RandomHorizontalFlip(prob=0.5)
    out_target = op(target_images)
    return out_input, out_target
示例#3
0
def test_bounding_box_augment_op_coco_c(plot_vis=False):
    """
    Prints images and bboxes side by side with and without BoundingBoxAugment Op applied,
    Testing with COCO dataset
    """
    logger.info("test_bounding_box_augment_op_coco_c")

    dataCoco1 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
                               decode=True, shuffle=False)

    dataCoco2 = ds.CocoDataset(DATA_DIR_2[0], annotation_file=DATA_DIR_2[1], task="Detection",
                               decode=True, shuffle=False)

    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)

    dataCoco2 = dataCoco2.map(operations=[test_op], input_columns=["image", "bbox"],
                              output_columns=["image", "bbox"],
                              column_order=["image", "bbox"])

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True),
                          dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp, "bbox")
示例#4
0
def create_dataset(data_home, repeat_num=1, batch_size=32, do_train=True, device_target="CPU"):
    """
    create data for next use such as training or inferring
    """

    cifar_ds = ds.Cifar10Dataset(data_home,num_parallel_workers=8, shuffle=True)

    c_trans = []
    if do_train:
        c_trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    c_trans += [
        C.Resize((224, 224)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image", num_parallel_workers=8)

    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_num)

    return cifar_ds
示例#5
0
def test_cpp_uniform_augment_random_crop_badinput(num_ops=1):
    """
    Test UniformAugment with greater crop size
    """
    logger.info("Test CPP UniformAugment with random_crop bad input")
    batch_size = 2
    cifar10_dir = "../data/dataset/testCifar10Data"
    ds1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]

    transforms_ua = [
        # Note: crop size [224, 224] > image size [32, 32]
        C.RandomCrop(size=[224, 224]),
        C.RandomHorizontalFlip()
    ]
    uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops)
    ds1 = ds1.map(operations=uni_aug, input_columns="image")

    # apply DatasetOps
    ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1)
    num_batches = 0
    try:
        for _ in ds1.create_dict_iterator(num_epochs=1, output_numpy=True):
            num_batches += 1
    except Exception as e:
        assert "Crop size" in str(e)
示例#6
0
def test_bounding_box_augment_valid_edge_c(plot_vis=False):
    """
    Test BoundingBoxAugment op (testing with valid edge case, box covering full image).
    Prints images side by side with and without Aug applied + bboxes to compare and test
    """
    logger.info("test_bounding_box_augment_valid_edge_c")

    original_seed = config_get_set_seed(1)
    original_num_parallel_workers = config_get_set_num_parallel_workers(1)

    dataVoc1 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)
    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)

    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)

    # map to apply ops
    # Add column for "bbox"
    dataVoc1 = dataVoc1.map(
        operations=lambda img, bbox:
        (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(
            np.float32)),
        input_columns=["image", "bbox"],
        output_columns=["image", "bbox"],
        column_order=["image", "bbox"])
    dataVoc2 = dataVoc2.map(
        operations=lambda img, bbox:
        (img, np.array([[0, 0, img.shape[1], img.shape[0], 0, 0, 0]]).astype(
            np.float32)),
        input_columns=["image", "bbox"],
        output_columns=["image", "bbox"],
        column_order=["image", "bbox"])
    dataVoc2 = dataVoc2.map(operations=[test_op],
                            input_columns=["image", "bbox"],
                            output_columns=["image", "bbox"],
                            column_order=["image", "bbox"])
    filename = "bounding_box_augment_valid_edge_c_result.npz"
    save_and_check_md5(dataVoc2, filename, generate_golden=GENERATE_GOLDEN)

    unaugSamp, augSamp = [], []

    for unAug, Aug in zip(
            dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True),
            dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)):
        unaugSamp.append(unAug)
        augSamp.append(Aug)

    if plot_vis:
        visualize_with_bounding_boxes(unaugSamp, augSamp)

    # Restore config setting
    ds.config.set_seed(original_seed)
    ds.config.set_num_parallel_workers(original_num_parallel_workers)
示例#7
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="CPU"):


    data_set = ds.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True)


    # define map operations
    trans = []
    if do_train:
        trans += [
            C.RandomCrop((32, 32), (4, 4, 4, 4)),
            C.RandomHorizontalFlip(prob=0.5)
        ]

    trans += [
        C.Resize((48,48)),
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8)

    # apply batch operations
    data_set = data_set.shuffle(buffer_size=10)
    data_set = data_set.batch(batch_size, drop_remainder=False)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)

    return data_set
示例#8
0
文件: main.py 项目: mindspore-ai/book
def create_dataset(data_path, batch_size=32, repeat_size=1, mode="train"):
    """
    create dataset for train or test
    """
    cifar_ds = ds.Cifar10Dataset(data_path)
    rescale = 1.0 / 255.0
    shift = 0.0

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    if mode == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(operations=typecast_op, input_columns="label")
    if mode == "train":
        cifar_ds = cifar_ds.map(operations=random_crop_op, input_columns="image")
        cifar_ds = cifar_ds.map(operations=random_horizontal_op, input_columns="image")
    cifar_ds = cifar_ds.map(operations=resize_op, input_columns="image")
    cifar_ds = cifar_ds.map(operations=rescale_op, input_columns="image")
    cifar_ds = cifar_ds.map(operations=normalize_op, input_columns="image")
    cifar_ds = cifar_ds.map(operations=channel_swap_op, input_columns="image")

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
示例#9
0
def test_bounding_box_augment_invalid_ratio_c():
    """
    Test BoundingBoxAugment op with invalid input ratio
    """
    logger.info("test_bounding_box_augment_invalid_ratio_c")

    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)

    try:
        # ratio range is from 0 - 1
        test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1),
                                              1.5)
        # map to apply ops
        dataVoc2 = dataVoc2.map(operations=[test_op],
                                input_columns=["image", "bbox"],
                                output_columns=["image", "bbox"],
                                column_order=["image",
                                              "bbox"])  # Add column for "bbox"
    except ValueError as error:
        logger.info("Got an exception in DE: {}".format(str(error)))
        assert "Input ratio is not within the required interval of (0.0 to 1.0)." in str(
            error)
示例#10
0
def test_bounding_box_augment_invalid_bounds_c():
    """
    Test BoundingBoxAugment op with invalid bboxes.
    """
    logger.info("test_bounding_box_augment_invalid_bounds_c")

    test_op = c_vision.BoundingBoxAugment(c_vision.RandomHorizontalFlip(1), 1)

    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)
    check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.WidthOverflow,
                   "bounding boxes is out of bounds of the image")
    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)
    check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.HeightOverflow,
                   "bounding boxes is out of bounds of the image")
    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)
    check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.NegativeXY, "min_x")
    dataVoc2 = ds.VOCDataset(DATA_DIR,
                             task="Detection",
                             usage="train",
                             shuffle=False,
                             decode=True)
    check_bad_bbox(dataVoc2, test_op, InvalidBBoxType.WrongShape, "4 features")
def test_serdes_uniform_augment(remove_json_files=True):
    """
    Test serdes on uniform augment.
    """
    data_dir = "../data/dataset/testPK/data"
    data = ds.ImageFolderDataset(dataset_dir=data_dir, shuffle=False)
    ds.config.set_seed(1)

    transforms_ua = [
        vision.RandomHorizontalFlip(),
        vision.RandomVerticalFlip(),
        vision.RandomColor(),
        vision.RandomSharpness(),
        vision.Invert(),
        vision.AutoContrast(),
        vision.Equalize()
    ]
    transforms_all = [
        vision.Decode(),
        vision.Resize(size=[224, 224]),
        vision.UniformAugment(transforms=transforms_ua, num_ops=5)
    ]
    data = data.map(operations=transforms_all,
                    input_columns="image",
                    num_parallel_workers=1)
    util_check_serialize_deserialize_file(data, "uniform_augment_pipeline",
                                          remove_json_files)
示例#12
0
def get_de_dataset(args):
    '''Get de_dataset.'''
    transform_label = [C.TypeCast(mstype.int32)]
    transform_img = [
        VC.Decode(),
        VC.Resize((96, 64)),
        VC.RandomColorAdjust(brightness=0.3,
                             contrast=0.3,
                             saturation=0.3,
                             hue=0),
        VC.RandomHorizontalFlip(),
        VC.Normalize((127.5, 127.5, 127.5), (127.5, 127.5, 127.5)),
        VC.HWC2CHW()
    ]

    de_dataset = de.ImageFolderDataset(dataset_dir=args.data_dir,
                                       num_shards=args.world_size,
                                       shard_id=args.local_rank,
                                       shuffle=True)
    de_dataset = de_dataset.map(input_columns="image",
                                operations=transform_img)
    de_dataset = de_dataset.map(input_columns="label",
                                operations=transform_label)
    de_dataset = de_dataset.project(columns=["image", "label"])
    de_dataset = de_dataset.batch(args.per_batch_size, drop_remainder=True)

    num_iter_per_gpu = de_dataset.get_dataset_size()
    de_dataset = de_dataset.repeat(args.max_epoch)
    num_classes = de_dataset.num_classes()

    return de_dataset, num_iter_per_gpu, num_classes
示例#13
0
def create_dataset_cifar10(data_path,
                           batch_size=32,
                           repeat_size=1,
                           status="train",
                           target="Ascend"):
    """
    create dataset for train or test
    """

    if target == "Ascend":
        device_num, rank_id = _get_rank_info()

    if target != "Ascend" or device_num == 1:
        cifar_ds = ds.Cifar10Dataset(data_path)
    else:
        cifar_ds = ds.Cifar10Dataset(data_path,
                                     num_parallel_workers=8,
                                     shuffle=True,
                                     num_shards=device_num,
                                     shard_id=rank_id)
    rescale = 1.0 / 255.0
    shift = 0.0
    cfg = alexnet_cifar10_cfg

    resize_op = CV.Resize((cfg.image_height, cfg.image_width))
    rescale_op = CV.Rescale(rescale, shift)
    normalize_op = CV.Normalize((0.4914, 0.4822, 0.4465),
                                (0.2023, 0.1994, 0.2010))
    if status == "train":
        random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4])
        random_horizontal_op = CV.RandomHorizontalFlip()
    channel_swap_op = CV.HWC2CHW()
    typecast_op = C.TypeCast(mstype.int32)
    cifar_ds = cifar_ds.map(input_columns="label",
                            operations=typecast_op,
                            num_parallel_workers=8)
    if status == "train":
        cifar_ds = cifar_ds.map(input_columns="image",
                                operations=random_crop_op,
                                num_parallel_workers=8)
        cifar_ds = cifar_ds.map(input_columns="image",
                                operations=random_horizontal_op,
                                num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=resize_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=rescale_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=normalize_op,
                            num_parallel_workers=8)
    cifar_ds = cifar_ds.map(input_columns="image",
                            operations=channel_swap_op,
                            num_parallel_workers=8)

    cifar_ds = cifar_ds.shuffle(buffer_size=cfg.buffer_size)
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)
    cifar_ds = cifar_ds.repeat(repeat_size)
    return cifar_ds
示例#14
0
def create_dataset(args, shuffle=True, max_dataset_size=float("inf")):
    """Create dataset"""
    dataroot = args.dataroot
    phase = args.phase
    batch_size = args.batch_size
    device_num = args.device_num
    rank = args.rank
    cores = multiprocessing.cpu_count()
    num_parallel_workers = min(8, int(cores / device_num))
    image_size = args.image_size
    mean = [0.5 * 255] * 3
    std = [0.5 * 255] * 3
    if phase == "train":
        dataset = UnalignedDataset(dataroot,
                                   phase,
                                   max_dataset_size=max_dataset_size)
        distributed_sampler = DistributedSampler(len(dataset),
                                                 device_num,
                                                 rank,
                                                 shuffle=shuffle)
        ds = de.GeneratorDataset(dataset,
                                 column_names=["image_A", "image_B"],
                                 sampler=distributed_sampler,
                                 num_parallel_workers=num_parallel_workers)
        trans = [
            C.RandomResizedCrop(image_size,
                                scale=(0.5, 1.0),
                                ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]
        ds = ds.map(operations=trans,
                    input_columns=["image_A"],
                    num_parallel_workers=num_parallel_workers)
        ds = ds.map(operations=trans,
                    input_columns=["image_B"],
                    num_parallel_workers=num_parallel_workers)
        ds = ds.batch(batch_size, drop_remainder=True)
        ds = ds.repeat(1)
    else:
        datadir = os.path.join(dataroot, args.data_dir)
        dataset = ImageFolderDataset(datadir,
                                     max_dataset_size=max_dataset_size)
        ds = de.GeneratorDataset(dataset,
                                 column_names=["image", "image_name"],
                                 num_parallel_workers=num_parallel_workers)
        trans = [
            C.Resize((image_size, image_size)),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]
        ds = ds.map(operations=trans,
                    input_columns=["image"],
                    num_parallel_workers=num_parallel_workers)
        ds = ds.batch(1, drop_remainder=True)
        ds = ds.repeat(1)
    args.dataset_size = len(dataset)
    return ds
示例#15
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """
    if do_train:
        dataset_path = os.path.join(dataset_path, 'train')
        do_shuffle = True
    else:
        dataset_path = os.path.join(dataset_path, 'eval')
        do_shuffle = False

    if device_num == 1 or not do_train:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle)
    else:
        ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle,
                               num_shards=device_num, shard_id=device_id)

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4))
    random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1))

    resize_op = C.Resize((resize_height, resize_width))
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])

    change_swap_op = C.HWC2CHW()

    trans = []
    if do_train:
        trans += [random_crop_op, random_horizontal_flip_op]

    trans += [resize_op, rescale_op, normalize_op, change_swap_op]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
    ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
示例#16
0
def create_dataset(data_path, is_train=True, batch_size=32):
    # import
    import mindspore.common.dtype as mstype
    import mindspore.dataset.engine as de
    import mindspore.dataset.transforms.c_transforms as C2
    import mindspore.dataset.vision.c_transforms as C
    from mindspore.common import set_seed

    set_seed(1)

    # shard
    num_shards = shard_id = None
    rand_size = os.getenv("RANK_SIZE")
    rand_id = os.getenv("RANK_ID")
    if rand_size is not None and rand_id is not None:
        num_shards = int(rand_size)
        shard_id = int(rand_id)

    # define dataset
    data_path = os.path.join(
        data_path,
        "cifar-10-batches-bin" if is_train else "cifar-10-verify-bin")
    ds = de.Cifar10Dataset(data_path,
                           shuffle=True,
                           num_shards=num_shards,
                           shard_id=shard_id,
                           num_parallel_workers=8,
                           num_samples=None)

    # define ops
    comps_ops = list()

    # train or val
    if is_train:
        comps_ops.append(C.RandomCrop((32, 32), (4, 4, 4, 4)))
        comps_ops.append(C.RandomHorizontalFlip(prob=0.5))

    comps_ops.append(C.Resize((224, 224)))
    comps_ops.append(C.Rescale(1 / 255.0, 0.))
    comps_ops.append(
        C.Normalize(mean=[0.4914, 0.4822, 0.4465],
                    std=[0.2023, 0.1994, 0.2010]))
    comps_ops.append(C.HWC2CHW())

    # map ops
    ds = ds.map(input_columns=["image"],
                operations=comps_ops,
                num_parallel_workers=8)
    ds = ds.map(input_columns=["label"],
                operations=C2.TypeCast(mstype.int32),
                num_parallel_workers=8)

    # batch & repeat
    ds = ds.batch(batch_size=batch_size, drop_remainder=is_train)
    ds = ds.repeat(count=1)

    return ds
示例#17
0
def create_dataset(args,
                   training,
                   data_path,
                   batch_size=32,
                   repeat_size=1,
                   num_parallel_workers=1):
    # define dataset
    cifar_ds = ds.Cifar10Dataset(data_path)

    # define operation parameters
    resize_height, resize_width = 224, 224
    rescale = 1.0 / 255.0
    shift = 0.0
    rescale_nml = 1 / 0.3081
    shift_nml = -1 * 0.1307 / 0.3081

    # define map operations
    random_crop_op = C.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    if args.debug:
        print(f'Random crop op: {random_crop_op}')
    random_horizontal_op = C.RandomHorizontalFlip()
    resize_op = C.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize((0.4914, 0.4822, 0.4465),
                               (0.2023, 0.1994, 0.2010))
    if args.debug:
        print(f'Normalize operation: {normalize_op}')
    changeswap_op = C.HWC2CHW()
    type_cast_op = C2.TypeCast(mstype.int32)
    if args.debug:
        print(f'Type cast operation: {type_cast_op}')

    c_trans = []

    if training:
        c_trans = [random_crop_op, random_horizontal_op]

    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]
    if args.debug:
        print(f'C transform: {c_trans}')

    # apply map operations on images
    cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label")
    cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image")

    # apply shuffle ops
    cifar_ds = cifar_ds.shuffle(buffer_size=10)

    # apply batch ops
    cifar_ds = cifar_ds.batch(batch_size=batch_size, drop_remainder=True)

    # apply repeat operators
    cifar_ds = cifar_ds.repeat(repeat_size)

    return cifar_ds
def create_dataset(dataset_path,
                   do_train,
                   repeat_num=1,
                   batch_size=32,
                   target="GPU",
                   dtype="fp16",
                   device_num=1):
    ds.config.set_numa_enable(True)
    if device_num == 1:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=4,
                                         shuffle=True)
    else:
        data_set = ds.ImageFolderDataset(dataset_path,
                                         num_parallel_workers=4,
                                         shuffle=True,
                                         num_shards=device_num,
                                         shard_id=get_rank())
    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]

    # define map operations
    normalize_op = C.Normalize(mean=mean, std=std)
    if dtype == "fp16":
        if args_opt.eval:
            x_dtype = "float32"
        else:
            x_dtype = "float16"
        normalize_op = C.NormalizePad(mean=mean, std=std, dtype=x_dtype)
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_size,
                                     scale=(0.08, 1.0),
                                     ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            normalize_op,
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(256),
            C.CenterCrop(image_size),
            normalize_op,
        ]
    if dtype == "fp32":
        trans.append(C.HWC2CHW())
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=8)
    # apply batch operations
    data_set = data_set.batch(batch_size, drop_remainder=True)
    # apply dataset repeat operation
    if repeat_num > 1:
        data_set = data_set.repeat(repeat_num)

    return data_set
示例#19
0
def test_cpp_uniform_augment(plot=False, num_ops=2):
    """
    Test UniformAugment
    """
    logger.info("Test CPP UniformAugment")

    # Original Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)

    transforms_original = [C.Decode(), C.Resize(size=[224, 224]),
                           F.ToTensor()]

    ds_original = data_set.map(operations=transforms_original, input_columns="image")

    ds_original = ds_original.batch(512)

    for idx, (image, _) in enumerate(ds_original):
        if idx == 0:
            images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_original = np.append(images_original,
                                        np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                        axis=0)

    # UniformAugment Images
    data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
    transforms_ua = [C.RandomCrop(size=[224, 224], padding=[32, 32, 32, 32]),
                     C.RandomHorizontalFlip(),
                     C.RandomVerticalFlip(),
                     C.RandomColorAdjust(),
                     C.RandomRotation(degrees=45)]

    uni_aug = C.UniformAugment(transforms=transforms_ua, num_ops=num_ops)

    transforms_all = [C.Decode(), C.Resize(size=[224, 224]),
                      uni_aug,
                      F.ToTensor()]

    ds_ua = data_set.map(operations=transforms_all, input_columns="image", num_parallel_workers=1)

    ds_ua = ds_ua.batch(512)

    for idx, (image, _) in enumerate(ds_ua):
        if idx == 0:
            images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1))
        else:
            images_ua = np.append(images_ua,
                                  np.transpose(image.asnumpy(), (0, 2, 3, 1)),
                                  axis=0)
    if plot:
        visualize_list(images_original, images_ua)

    num_samples = images_original.shape[0]
    mse = np.zeros(num_samples)
    for i in range(num_samples):
        mse[i] = diff_mse(images_ua[i], images_original[i])
    logger.info("MSE= {}".format(str(np.mean(mse))))
示例#20
0
def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device_num=1, rank_id=0,
                            is_training=True, num_parallel_workers=4):
    """Creatr deeptext dataset with MindDataset."""
    ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id,
                        num_parallel_workers=1, shuffle=is_training)
    decode = C.Decode()
    ds = ds.map(operations=decode, input_columns=["image"], num_parallel_workers=1)
    compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training))

    hwc_to_chw = C.HWC2CHW()
    normalize_op = C.Normalize((123.675, 116.28, 103.53), (58.395, 57.12, 57.375))
    horizontally_op = C.RandomHorizontalFlip(1)
    type_cast0 = CC.TypeCast(mstype.float32)
    type_cast1 = CC.TypeCast(mstype.float32)
    type_cast2 = CC.TypeCast(mstype.int32)
    type_cast3 = CC.TypeCast(mstype.bool_)

    if is_training:
        ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
                    output_columns=["image", "image_shape", "box", "label", "valid_num"],
                    column_order=["image", "image_shape", "box", "label", "valid_num"],
                    num_parallel_workers=num_parallel_workers)

        flip = (np.random.rand() < config.flip_ratio)
        if flip:
            ds = ds.map(operations=[normalize_op, type_cast0, horizontally_op], input_columns=["image"],
                        num_parallel_workers=12)
            ds = ds.map(operations=flipped_generation,
                        input_columns=["image", "image_shape", "box", "label", "valid_num"],
                        num_parallel_workers=num_parallel_workers)
        else:
            ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
                        num_parallel_workers=12)
        ds = ds.map(operations=[hwc_to_chw, type_cast1], input_columns=["image"],
                    num_parallel_workers=12)

    else:
        ds = ds.map(operations=compose_map_func,
                    input_columns=["image", "annotation"],
                    output_columns=["image", "image_shape", "box", "label", "valid_num"],
                    column_order=["image", "image_shape", "box", "label", "valid_num"],
                    num_parallel_workers=num_parallel_workers)

        ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
                    num_parallel_workers=24)

    # transpose_column from python to c
    ds = ds.map(operations=[type_cast1], input_columns=["image_shape"])
    ds = ds.map(operations=[type_cast1], input_columns=["box"])
    ds = ds.map(operations=[type_cast2], input_columns=["label"])
    ds = ds.map(operations=[type_cast3], input_columns=["valid_num"])
    ds = ds.batch(batch_size, drop_remainder=True)
    ds = ds.repeat(repeat_num)

    return ds
示例#21
0
def create_dataset(dataset_path, config, do_train, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        config(dict): config of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    rank = config.rank
    group_size = config.group_size
    if group_size == 1:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=config.work_nums,
                                   shuffle=True)
    else:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=config.work_nums,
                                   shuffle=True,
                                   num_shards=group_size,
                                   shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(config.image_size),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, saturation=0.5)  # fast mode
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(int(config.image_size / 0.875)),
            C.CenterCrop(config.image_size)
        ]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
        C.HWC2CHW()
    ]
    type_cast_op = C2.TypeCast(mstype.int32)
    ds = ds.map(operations=trans,
                input_columns="image",
                num_parallel_workers=config.work_nums)
    ds = ds.map(operations=type_cast_op,
                input_columns="label",
                num_parallel_workers=config.work_nums)
    # apply batch operations
    ds = ds.batch(config.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
def create_dataset(data_home, do_train, batch_size=32, repeat_num=1):
    """
    create a train or evaluate cifar10 dataset for resnet50
    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
        target(str): the device target. Default: Ascend

    Returns:
        dataset
    """
    # define dataset
    cifar_ds = ds.Cifar10Dataset(data_home)

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = C.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = C.RandomHorizontalFlip()
    resize_op = C.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = C.Rescale(rescale, shift)
    normalize_op = C.Normalize((0.4914, 0.4822, 0.4465),
                               (0.2023, 0.1994, 0.2010))
    changeswap_op = C.HWC2CHW()
    type_cast_op = C2.TypeCast(mstype.int32)

    c_trans = []
    if do_train:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    cifar_ds = cifar_ds.map(operations=type_cast_op, input_columns="label")
    cifar_ds = cifar_ds.map(operations=c_trans, input_columns="image")

    # apply DatasetOps
    # buffer_size = 10000
    # apply shuffle operations
    cifar_ds = cifar_ds.shuffle(buffer_size=10)

    # apply batch operations
    #cifar_ds = cifar_ds.batch(batch_size=args_opt.batch_size, drop_remainder=True) #fix this
    cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True)  #fix this

    # apply repeat operations
    cifar_ds = cifar_ds.repeat(repeat_num)

    return cifar_ds
def create_dataset(repeat_num=1,
                   training=True,
                   batch_size=32,
                   rank_id=0,
                   rank_size=1,
                   enable_hccl=False):
    data_dir = data_home + "/cifar-10-batches-bin"
    if not training:
        data_dir = data_home + "/cifar-10-verify-bin"
    data_set = ds.Cifar10Dataset(data_dir)

    if enable_hccl:
        rank_id = rank_id
        rank_size = rank_size
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id)

    resize_height = 224
    resize_width = 224
    rescale = 1.0 / 255.0
    shift = 0.0

    # define map operations
    random_crop_op = vision.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = vision.RandomHorizontalFlip()
    # interpolation default BILINEAR
    resize_op = vision.Resize((resize_height, resize_width))
    rescale_op = vision.Rescale(rescale, shift)
    normalize_op = vision.Normalize((0.4465, 0.4822, 0.4914),
                                    (0.2010, 0.1994, 0.2023))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = C.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(operations=type_cast_op, input_columns="label")
    data_set = data_set.map(operations=c_trans, input_columns="image")

    # apply shuffle operations
    data_set = data_set.shuffle(buffer_size=1000)

    # apply batch operations
    data_set = data_set.batch(batch_size=batch_size, drop_remainder=True)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    return data_set
示例#24
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32

    Returns:
        dataset
    """

    device_num = int(os.getenv("RANK_SIZE"))
    rank_id = int(os.getenv("RANK_ID"))
    if device_num == 1:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True)
    else:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True,
                                   num_shards=device_num, shard_id=rank_id)

    image_size = 224
    mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
    std = [0.229 * 255, 0.224 * 255, 0.225 * 255]

    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize((256, 256)),
            C.CenterCrop(image_size),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8)
    ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
示例#25
0
def create_dataset_cifar10(data_home,
                           repeat_num=1,
                           training=True,
                           cifar_cfg=None):
    """Data operations."""
    data_dir = os.path.join(data_home, "cifar-10-batches-bin")
    if not training:
        data_dir = os.path.join(data_home, "cifar-10-verify-bin")

    rank_size, rank_id = _get_rank_info()
    if training:
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id,
                                     shuffle=True)
    else:
        data_set = ds.Cifar10Dataset(data_dir,
                                     num_shards=rank_size,
                                     shard_id=rank_id,
                                     shuffle=False)

    resize_height = cifar_cfg.image_height
    resize_width = cifar_cfg.image_width

    # define map operations
    random_crop_op = vision.RandomCrop(
        (32, 32), (4, 4, 4, 4))  # padding_mode default CONSTANT
    random_horizontal_op = vision.RandomHorizontalFlip()
    resize_op = vision.Resize(
        (resize_height, resize_width))  # interpolation default BILINEAR
    rescale_op = vision.Rescale(1.0 / 255.0, 0.0)
    #normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
    normalize_op = vision.Normalize((0.4914, 0.4822, 0.4465),
                                    (0.24703233, 0.24348505, 0.26158768))
    changeswap_op = vision.HWC2CHW()
    type_cast_op = c_transforms.TypeCast(mstype.int32)

    c_trans = []
    if training:
        c_trans = [random_crop_op, random_horizontal_op]
    c_trans += [resize_op, rescale_op, normalize_op, changeswap_op]

    # apply map operations on images
    data_set = data_set.map(operations=type_cast_op, input_columns="label")
    data_set = data_set.map(operations=c_trans, input_columns="image")

    # apply batch operations
    data_set = data_set.batch(batch_size=cifar_cfg.batch_size,
                              drop_remainder=True)

    # apply repeat operations
    data_set = data_set.repeat(repeat_num)

    return data_set
示例#26
0
def create_dataset(dataset_path,
                   do_train,
                   batch_size=16,
                   device_num=1,
                   rank=0):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        batch_size(int): the batch size of dataset. Default: 16.
        device_num (int): Number of shards that the dataset should be divided into (default=1).
        rank (int): The shard ID within num_shards (default=0).

    Returns:
        dataset
    """
    if device_num == 1:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=8,
                                   shuffle=True)
    else:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=8,
                                   shuffle=True,
                                   num_shards=device_num,
                                   shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(299),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
        ]
    else:
        trans = [C.Decode(), C.Resize(320), C.CenterCrop(299)]
    trans += [
        C.Normalize(mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5]),
        C.HWC2CHW(),
        C2.TypeCast(mstype.float32)
    ]

    type_cast_op = C2.TypeCast(mstype.int32)
    ds = ds.map(input_columns="image",
                operations=trans,
                num_parallel_workers=8)
    ds = ds.map(input_columns="label",
                operations=type_cast_op,
                num_parallel_workers=8)
    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)
    return ds
示例#27
0
def create_dataset4(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"):
    """
    create a train or eval imagenet2012 dataset for se-resnet50

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        repeat_num(int): the repeat times of dataset. Default: 1
        batch_size(int): the batch size of dataset. Default: 32
        target(str): the device target. Default: Ascend

    Returns:
        dataset
    """
    if target == "Ascend":
        device_num, rank_id = _get_rank_info()
    if device_num == 1:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True)
    else:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True,
                                   num_shards=device_num, shard_id=rank_id)
    image_size = 224
    mean = [123.68, 116.78, 103.94]
    std = [1.0, 1.0, 1.0]

    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]
    else:
        trans = [
            C.Decode(),
            C.Resize(292),
            C.CenterCrop(256),
            C.Normalize(mean=mean, std=std),
            C.HWC2CHW()
        ]

    type_cast_op = C2.TypeCast(mstype.int32)
    ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=12)
    ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)

    return ds
示例#28
0
def create_dataset(dataset_path, do_train, rank, group_size, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        rank (int): The shard ID within num_shards (default=None).
        group_size (int): Number of shards that the dataset should be divided into (default=None).
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    if group_size == 1:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=cfg.work_nums,
                                   shuffle=True)
    else:
        ds = de.ImageFolderDataset(dataset_path,
                                   num_parallel_workers=cfg.work_nums,
                                   shuffle=True,
                                   num_shards=group_size,
                                   shard_id=rank)
    # define map operations
    if do_train:
        trans = [
            C.RandomCropDecodeResize(299,
                                     scale=(0.08, 1.0),
                                     ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
        ]
    else:
        trans = [C.Decode(), C.Resize(299), C.CenterCrop(299)]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        C.HWC2CHW()
    ]
    type_cast_op = C2.TypeCast(mstype.int32)
    ds = ds.map(operations=trans,
                input_columns="image",
                num_parallel_workers=cfg.work_nums)
    ds = ds.map(operations=type_cast_op,
                input_columns="label",
                num_parallel_workers=cfg.work_nums)
    # apply batch operations
    ds = ds.batch(cfg.batch_size, drop_remainder=True)
    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
示例#29
0
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32):
    """
    Create a train or eval dataset.

    Args:
        dataset_path (str): The path of dataset.
        do_train (bool): Whether dataset is used for train or eval.
        repeat_num (int): The repeat times of dataset. Default: 1.
        batch_size (int): The batch size of dataset. Default: 32.

    Returns:
        Dataset.
    """

    do_shuffle = bool(do_train)

    if device_num == 1 or not do_train:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums, shuffle=do_shuffle)
    else:
        ds = de.ImageFolderDataset(dataset_path, num_parallel_workers=config.work_nums,
                                   shuffle=do_shuffle, num_shards=device_num, shard_id=device_id)

    image_length = 299
    if do_train:
        trans = [
            C.RandomCropDecodeResize(image_length, scale=(0.08, 1.0), ratio=(0.75, 1.333)),
            C.RandomHorizontalFlip(prob=0.5),
            C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
            ]
    else:
        trans = [
            C.Decode(),
            C.Resize(image_length),
            C.CenterCrop(image_length)
            ]
    trans += [
        C.Rescale(1.0 / 255.0, 0.0),
        C.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        C.HWC2CHW()
    ]

    type_cast_op = C2.TypeCast(mstype.int32)

    ds = ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=config.work_nums)
    ds = ds.map(input_columns="image", operations=trans, num_parallel_workers=config.work_nums)

    # apply batch operations
    ds = ds.batch(batch_size, drop_remainder=True)

    # apply dataset repeat operation
    ds = ds.repeat(repeat_num)
    return ds
示例#30
0
def create_dataset_cifar10(dataset_path, do_train, cfg, repeat_num=1):
    """
    create a train or eval dataset

    Args:
        dataset_path(string): the path of dataset.
        do_train(bool): whether dataset is used for train or eval.
        cfg (dict): the config for creating dataset.
        repeat_num(int): the repeat times of dataset. Default: 1.

    Returns:
        dataset
    """
    dataset_path = os.path.join(
        dataset_path,
        "cifar-10-batches-bin" if do_train else "cifar-10-verify-bin")
    if cfg.group_size == 1:
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=cfg.work_nums,
                                     shuffle=True)
    else:
        data_set = ds.Cifar10Dataset(dataset_path,
                                     num_parallel_workers=cfg.work_nums,
                                     shuffle=True,
                                     num_shards=cfg.group_size,
                                     shard_id=cfg.rank)

    # define map operations
    trans = []
    if do_train:
        trans.append(C.RandomCrop((32, 32), (4, 4, 4, 4)))
        trans.append(C.RandomHorizontalFlip(prob=0.5))

    trans.append(C.Resize((299, 299)))
    trans.append(C.Rescale(1.0 / 255.0, 0.0))
    trans.append(
        C.Normalize(mean=[0.4914, 0.4822, 0.4465],
                    std=[0.2023, 0.1994, 0.2010]))
    trans.append(C.HWC2CHW())

    type_cast_op = C2.TypeCast(mstype.int32)
    data_set = data_set.map(operations=trans,
                            input_columns="image",
                            num_parallel_workers=cfg.work_nums)
    data_set = data_set.map(operations=type_cast_op,
                            input_columns="label",
                            num_parallel_workers=cfg.work_nums)
    # apply batch operations
    data_set = data_set.batch(cfg.batch_size, drop_remainder=do_train)
    # apply dataset repeat operation
    data_set = data_set.repeat(repeat_num)
    return data_set