def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ Create a train or eval dataset. Args: dataset_path (str): The path of dataset. do_train (bool): Whether dataset is used for train or eval. repeat_num (int): The repeat times of dataset. Default: 1. batch_size (int): The batch size of dataset. Default: 32. Returns: Dataset. """ if do_train: dataset_path = os.path.join(dataset_path, 'train') do_shuffle = True else: dataset_path = os.path.join(dataset_path, 'eval') do_shuffle = False if device_num == 1 or not do_train: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=do_shuffle, num_shards=device_num, shard_id=device_id) resize_height = 224 resize_width = 224 buffer_size = 100 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1)) resize_op = C.Resize((resize_height, resize_width)) rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) change_swap_op = C.HWC2CHW() trans = [] if do_train: trans += [random_crop_op, random_horizontal_flip_op] trans += [resize_op, rescale_op, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32): """ create a train or eval dataset Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 Returns: dataset """ device_num = int(os.getenv("DEVICE_NUM")) rank_id = int(os.getenv("RANK_ID")) if device_num == 1: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations trans = [] if do_train: trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] trans += [ C.Resize((config.image_height, config.image_width)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", num_parallel_workers=8, operations=type_cast_op) ds = ds.map(input_columns="image", num_parallel_workers=8, operations=trans) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def create_dataset1(dataset_path, do_train, repeat_num=1, batch_size=32, target="Ascend"): """ create a train or evaluate cifar10 dataset for resnet50 Args: dataset_path(string): the path of dataset. do_train(bool): whether dataset is used for train or eval. repeat_num(int): the repeat times of dataset. Default: 1 batch_size(int): the batch size of dataset. Default: 32 target(str): the device target. Default: Ascend Returns: dataset """ if target == "Ascend": device_num, rank_id = _get_rank_info() else: init() rank_id = get_rank() device_num = get_group_size() if device_num == 1: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True, num_shards=device_num, shard_id=rank_id) # define map operations trans = [] if do_train: trans += [ C.RandomCrop((32, 32), (4, 4, 4, 4)), C.RandomHorizontalFlip(prob=0.5) ] trans += [ C.Resize((224, 224)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): """ Test UniformAugment with greater crop size """ logger.info("Test CPP UniformAugment with random_crop bad input") batch_size = 2 cifar10_dir = "../data/dataset/testCifar10Data" ds1 = de.Cifar10Dataset(cifar10_dir, shuffle=False) # shape = [32,32,3] transforms_ua = [ # Note: crop size [224, 224] > image size [32, 32] C.RandomCrop(size=[224, 224]), C.RandomHorizontalFlip() ] uni_aug = C.UniformAugment(operations=transforms_ua, num_ops=num_ops) ds1 = ds1.map(input_columns="image", operations=uni_aug) # apply DatasetOps ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1) num_batches = 0 try: for _ in ds1.create_dict_iterator(): num_batches += 1 except Exception as e: assert "Crop size" in str(e)
def create_dataset(data_path, batch_size): ds = de.Cifar10Dataset( data_path, num_parallel_workers=8, shuffle=False, ) # define map operations trans = [] # if do_train: # trans += [ # # C.RandomCrop((32, 32), (4, 4, 4, 4)), # # C.RandomHorizontalFlip(prob=0.5) # ] trans += [ C.Resize((224, 224)), C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) return ds
def create_dataset(args, data_path, batch_size): if args.mode == 'train' and args.use_kungfu: rank = kfops.kungfu_current_rank() size = kfops.kungfu_current_cluster_size() ds = de.Cifar10Dataset( data_path, num_parallel_workers=8, shuffle=False, num_shards=size, shard_id=rank, ) print('using shard %d of %d' % (rank, size)) else: ds = de.Cifar10Dataset( data_path, num_parallel_workers=8, shuffle=False, ) # define map operations trans = [] # if do_train: # trans += [ # # C.RandomCrop((32, 32), (4, 4, 4, 4)), # # C.RandomHorizontalFlip(prob=0.5) # ] trans += [ C.Rescale(1.0 / 255.0, 0.0), C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), C.HWC2CHW() ] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) ds = ds.map(operations=trans, input_columns="image", num_parallel_workers=8) # apply batch operations ds = ds.batch(batch_size, drop_remainder=True) return ds
def create_dataset(data_path, is_train=True, batch_size=32): # import import mindspore.common.dtype as mstype import mindspore.dataset.engine as de import mindspore.dataset.transforms.c_transforms as C2 import mindspore.dataset.vision.c_transforms as C from mindspore.common import set_seed set_seed(1) # shard num_shards = shard_id = None rand_size = os.getenv("RANK_SIZE") rand_id = os.getenv("RANK_ID") if rand_size is not None and rand_id is not None: num_shards = int(rand_size) shard_id = int(rand_id) # define dataset data_path = os.path.join( data_path, "cifar-10-batches-bin" if is_train else "cifar-10-verify-bin") ds = de.Cifar10Dataset(data_path, shuffle=True, num_shards=num_shards, shard_id=shard_id, num_parallel_workers=8, num_samples=None) # define ops comps_ops = list() # train or val if is_train: comps_ops.append(C.RandomCrop((32, 32), (4, 4, 4, 4))) comps_ops.append(C.RandomHorizontalFlip(prob=0.5)) comps_ops.append(C.Resize((224, 224))) comps_ops.append(C.Rescale(1 / 255.0, 0.)) comps_ops.append( C.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010])) comps_ops.append(C.HWC2CHW()) # map ops ds = ds.map(input_columns=["image"], operations=comps_ops, num_parallel_workers=8) ds = ds.map(input_columns=["label"], operations=C2.TypeCast(mstype.int32), num_parallel_workers=8) # batch & repeat ds = ds.batch(batch_size=batch_size, drop_remainder=is_train) ds = ds.repeat(count=1) return ds
def create_dataset(dataset_path, do_train, config, repeat_num=1): """ Create a train or eval dataset. Args: dataset_path (string): The path of dataset. do_train (bool): Whether dataset is used for train or eval. config: configuration repeat_num (int): The repeat times of dataset. Default: 1. Returns: Dataset. """ if do_train: dataset_path = os.path.join(dataset_path, 'train') do_shuffle = True else: dataset_path = os.path.join(dataset_path, 'eval') do_shuffle = False device_id = 0 device_num = 1 if config.platform == "GPU": if config.run_distribute: from mindspore.communication.management import get_rank, get_group_size device_id = get_rank() device_num = get_group_size() elif config.platform == "Ascend": device_id = int(os.getenv('DEVICE_ID')) device_num = int(os.getenv('RANK_SIZE')) if device_num == 1 or not do_train: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=do_shuffle) else: ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=4, shuffle=do_shuffle, num_shards=device_num, shard_id=device_id) resize_height = config.image_height resize_width = config.image_width buffer_size = 100 rescale = 1.0 / 255.0 shift = 0.0 # define map operations random_crop_op = C.RandomCrop((32, 32), (4, 4, 4, 4)) random_horizontal_flip_op = C.RandomHorizontalFlip(device_id / (device_id + 1)) resize_op = C.Resize((resize_height, resize_width)) rescale_op = C.Rescale(rescale, shift) normalize_op = C.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) change_swap_op = C.HWC2CHW() trans = [] if do_train: trans += [random_crop_op, random_horizontal_flip_op] trans += [resize_op, rescale_op, normalize_op, change_swap_op] type_cast_op = C2.TypeCast(mstype.int32) ds = ds.map(input_columns="label", operations=type_cast_op) ds = ds.map(input_columns="image", operations=trans) # apply shuffle operations ds = ds.shuffle(buffer_size=buffer_size) # apply batch operations ds = ds.batch(config.batch_size, drop_remainder=True) # apply dataset repeat operation ds = ds.repeat(repeat_num) return ds
from mindspore import context from mindspore.train.model import Model from mindspore.nn.loss.loss import _Loss from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.common.tensor import Tensor from mindspore.train.callback import Callback from mindspore.nn.optim.momentum import Momentum from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=False, device_target="Ascend") train_path = "./datasets/cifar10/train" ds = de.Cifar10Dataset(train_path, num_parallel_workers=8, shuffle=True) print("the cifar dataset size is :", ds.get_dataset_size()) dict1 = ds.create_dict_iterator() datas = dict1.get_next() image = datas["image"].asnumpy() print("the tensor of image is:", image.shape) plt.imshow(np.array(image)) plt.show() def create_dataset(dataset_path, do_train, repeat_num=10, batch_size=32): ds = de.Cifar10Dataset(dataset_path, num_parallel_workers=8, shuffle=True) # define map operations trans = [] if do_train: