def lean_mnist(): """ tfaug classification example Returns ------- None. """ os.makedirs(DATADIR + 'mnist', exist_ok=True) # load mnist dataset (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # save as tfrecord TfrecordConverter().from_ary_label(x_train, y_train, DATADIR + 'mnist/train.tfrecord') TfrecordConverter().from_ary_label(x_test, y_test, DATADIR + 'mnist/test.tfrecord') batch_size, shuffle_buffer = 25, 25 # create training and validation dataset using tfaug: ds_train, train_cnt = (DatasetCreator( shuffle_buffer=shuffle_buffer, batch_size=batch_size, repeat=True, random_zoom=[0.1, 0.1], random_rotation=20, random_shear=[10, 10], random_blur=10, training=True).from_tfrecords([DATADIR + 'mnist/train.tfrecord'])) ds_valid, valid_cnt = (DatasetCreator( shuffle_buffer=shuffle_buffer, batch_size=batch_size, repeat=True, training=False).from_tfrecords([DATADIR + 'mnist/test.tfrecord'])) model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10) ]) model.compile( optimizer=tf.keras.optimizers.Adam(0.002), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['sparse_categorical_accuracy']) # learn model model.fit(ds_train, epochs=10, validation_data=ds_valid, steps_per_epoch=train_cnt // batch_size, validation_steps=valid_cnt // batch_size) # evaluation result model.evaluate(ds_valid, steps=valid_cnt // batch_size, verbose=2)
def toy_example(): # prepare inputs and labels batch_size = 2 shuffle_buffer = 10 filepaths = [DATADIR + 'Lenna.png'] * 10 class_labels = np.random.randint(0, 10, 10) # define tfrecord path path_record = DATADIR + 'multi_input.tfrecord' # generate tfrecords in a one-line TfrecordConverter().from_path_label(filepaths, class_labels, path_record) # define augmentation parameters aug_parms = { 'random_rotation': 5, 'random_flip_left_right': True, 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_crop': None, 'random_blur': [0.5, 1.5] } # set augmentation and learning parameters to dataset dc = DatasetCreator(shuffle_buffer, batch_size, **aug_parms, repeat=True, training=True) # define dataset and number of dataset ds, imgcnt = dc.from_tfrecords(path_record) # define the handling of multiple inputs => just resize and concat # multiple inputs were named {'image_in0', 'image_in1' , ...} in inputs dictionary def concat_inputs(inputs, label): resized = tf.image.resize(inputs['image_in1'], (512, 512)) concated = tf.concat([inputs['image_in0'], resized], axis=-1) # resized = tf.image.resize(concated, (224, 224)) return concated, label ds = ds.map(concat_inputs) # define the model mbnet = tf.keras.applications.MobileNetV2(input_shape=[512, 512, 6], include_top=True, weights=None) mbnet.compile(optimizer="adam", loss="mse", metrics=["mae"]) # learn the model mbnet.fit( ds, epochs=10, steps_per_epoch=imgcnt // batch_size, )
def learn_ade20k(): crop_size = [256, 256] # cropped input image size # original input image size batch_size = 5 # donwload overlap_buffer = 256 // 4 download_and_convert_ADE20k(crop_size, overlap_buffer) # define training and validation dataset using tfaug: tfrecords_train = glob( DATADIR + 'ADE20k/ADEChallengeData2016/tfrecord/training_*.tfrecords') ds_train, train_cnt = (DatasetCreator( shuffle_buffer=batch_size, batch_size=batch_size, repeat=True, standardize=True, random_zoom=[0.1, 0.1], random_rotation=10, random_shear=[10, 10], random_crop=crop_size, dtype=tf.float16, training=True).from_tfrecords(tfrecords_train)) tfrecords_valid = glob( DATADIR + 'ADE20k/ADEChallengeData2016/tfrecord/validation_*.tfrecords') ds_valid, valid_cnt = (DatasetCreator( shuffle_buffer=batch_size, batch_size=batch_size, repeat=True, standardize=True, random_crop=crop_size, dtype=tf.float16, training=False).from_tfrecords(tfrecords_valid)) # define model model = def_unet(tuple(crop_size + [3]), 151) # 150class + padding area model.compile( optimizer=tf.keras.optimizers.Adam(0.002), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['sparse_categorical_accuracy']) model.fit(ds_train, epochs=10, validation_data=ds_valid, steps_per_epoch=train_cnt // batch_size, validation_steps=valid_cnt // batch_size) model.evaluate(ds_valid, steps=valid_cnt // batch_size, verbose=2)
def test_from_path(self): BATCH_SIZE = 2 flist_imgs = [DATADIR + 'Lenna.png'] * 10 flist_seglabels = flist_imgs.copy() img_org = np.array(Image.open(flist_imgs[0])) clslabels = list(range(10)) path_tfrecord = DATADIR + 'test_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_seglabels, path_tfrecord) with self.subTest('check segmentation label'): # check segmentation label dc = DatasetCreator(1, BATCH_SIZE, training=True) ds, imgcnt = dc.from_tfrecords([path_tfrecord]) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert (label == img_org).numpy().all(), 'labels is changed' with self.subTest('check multiple inputs and labels'): # check segmentation label dc = DatasetCreator(1, BATCH_SIZE, training=True) path_tfrecord = DATADIR + 'test_from_path_multi.tfrecord' TfrecordConverter().from_path_label( list(zip(flist_imgs, flist_imgs)), list(zip(flist_imgs, flist_imgs)), path_tfrecord) ds, imgcnt = dc.from_tfrecords([path_tfrecord]) for i, features in enumerate(ds): assert (features['image_in0'] == img_org ).numpy().all(), 'image is changed' assert (features['image_in1'] == img_org ).numpy().all(), 'image1 is changed' assert (features['label_in0'] == img_org ).numpy().all(), 'labels is changed' assert (features['label_in1'] == img_org ).numpy().all(), 'labels is changed' with self.subTest('check class label'): # check class label path_tfrecord = DATADIR + 'test_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert all(label.numpy() == clslabels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]), '''
def test_tf_function(self): BATCH_SIZE = 5 # data augmentation configurations: DATAGEN_CONF = { 'standardize': False, 'resize': [100, 100], # 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, # 'random_shift': [25, 25], 'random_zoom': [0.2, 0.2], # 'random_shear': [5, 5], # 'random_brightness': 0.2, # 'random_hue': 0.00001, 'random_contrast': [0.6, 1.4], 'random_crop': None, # what to set random_crop 'random_noise': 5, # 'random_saturation': [0.5, 1.5], 'input_shape': [BATCH_SIZE, 512, 512, 3], 'num_transforms': 10 } flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE # test for ratio_samples labels = [0] * 10 * BATCH_SIZE dc = DatasetCreator(BATCH_SIZE, BATCH_SIZE, **DATAGEN_CONF, training=True) ds = dc.from_path(flist, labels) taked = iter(ds.take(10)) @tf.function def one_step(): img, lbl = next(taked) # print('imgshape:', img.shape, 'lblshape:', lbl.shape) return img, lbl img, lbl = one_step() assert img.shape[0] == BATCH_SIZE, 'invalid batch size' zipped = zip(img, lbl) piyo0, piyo1 = next(zipped) tool.plot_dsresult(((img, lbl), ), BATCH_SIZE, 1, DATADIR + 'test_tf_function.png')
def test_set_inputs_shapes(self): BATCH_SIZE = 2 flist_imgs = [DATADIR + 'Lenna.png'] * 10 flist_imgs_small = [DATADIR + 'Lenna_crop.png'] * 10 clslabels = list(range(10)) dc = DatasetCreator(False, BATCH_SIZE, training=True) ds1 = DatasetCreator(1, BATCH_SIZE).from_path(flist_imgs) ds2 = DatasetCreator(1, BATCH_SIZE).from_path(flist_imgs_small) lbl = DatasetCreator(1, BATCH_SIZE).from_path(flist_imgs) lbl_cls = tf.data.Dataset.from_tensor_slices(clslabels).batch( BATCH_SIZE) # segmentation test_ds1 = tf.data.Dataset.zip((ds1, lbl)) shapes_in, shapes_lbl = dc._get_inputs_shapes(test_ds1, 'segmentation', 1) assert shapes_in == [(BATCH_SIZE, 512, 512, 3)], 'invalid shape' assert shapes_lbl == [(BATCH_SIZE, 512, 512, 3)], 'invalid label shape' # classification test_ds2 = tf.data.Dataset.zip((ds1, ds2, lbl_cls)) shapes_in, shapes_lbl = dc._get_inputs_shapes(test_ds2, 'class', 2) assert shapes_in[0] == (BATCH_SIZE, 512, 512, 3) assert shapes_in[1] == (BATCH_SIZE, 256, 512, 3) assert shapes_lbl == [(BATCH_SIZE)]
def prepare_ds(train): def resize(dataset): img = tf.image.resize(dataset['image'], RESIZE) msk = tf.image.resize(dataset['segmentation_mask'], RESIZE) return (img, img, msk, msk) extracted = train.map(resize).batch(BATCH_SIZE) auged = DatasetCreator(10, BATCH_SIZE, **augprm._asdict()).from_dataset( extracted, 'segmentation', 2) def cat(data): return ({ 'in1': data['image_in0'], 'in2': data['image_in1'] }, tf.concat([data['label_in0'], data['label_in1']], axis=-1)) return auged.map(cat)
def quick_toy_sample(): # source image and labels imgpaths = ['testdata/tfaug/Lenna.png'] * 10 labels = np.random.randint(0, 255, 10) # configure and create dataset dataset = DatasetCreator( shuffle_buffer=10, batch_size=2, repeat=True, standardize=True, # add augmentation params here training=True).from_path(imgpaths, labels) # define and compile the model mbnet = tf.keras.applications.MobileNetV2(include_top=True, weights=None) mbnet.compile(optimizer="adam", loss="mse", metrics=["mae"]) # learn the model mbnet.fit(dataset, epochs=10, steps_per_epoch=10)
def test_sharded_from_path(self): flist_imgs = [DATADIR + 'Lenna.png'] * 10 flist_seglabels = flist_imgs.copy() img_org = np.array(Image.open(flist_imgs[0])) clslabels = list(range(10)) path_tfrecord = DATADIR + 'test_shards_from_path.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_seglabels, path_tfrecord, image_per_shard=3) path_tfrecords = glob(DATADIR + 'test_shards_from_path_?.tfrecord') assert len(path_tfrecords) == 4, 'num of shards is invalid' # check segmentation label dc = DatasetCreator(1, 1, training=True) ds, imgcnt = dc.from_tfrecords(path_tfrecords) for i, (img, label) in enumerate(ds): assert (img == img_org).numpy().all(), 'image is changed' assert (label == img_org).numpy().all(), 'labels is changed' path_tfrecord = DATADIR + 'test_shards_from_path_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord, image_per_shard=2) path_tfrecords = glob(DATADIR + 'test_shards_from_path_seg_?.tfrecord') assert len(path_tfrecords) == 5, 'num of shards is invalid' # check class label dc = DatasetCreator(False, 1, training=True) ds, datacnt = dc.from_tfrecords(path_tfrecords) list_label = [] for i, (img, label) in enumerate(ds): list_label.append(label.numpy()) assert (img == img_org).numpy().all(), 'image was changed' label_all = np.concatenate(sorted(list_label)) assert all(label_all == clslabels), 'label was changed'
def download_and_convert_ADE20k(input_size, overlap_buffer): """ Donload and Converts the ADE20k dataset into tfrecord format. """ link = r'http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip' dstdir = DATADIR + 'ADE20k/' os.makedirs(dstdir, exist_ok=True) if not os.path.isfile(dstdir + 'ADEChallengeData2016.zip'): print('start donloading ADE20k...', flush=True) with requests.get(link, stream=True) as response: total_size_in_bytes = int(response.headers.get( 'content-length', 0)) block_size = 1024 # 1 Kilobyte progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True) with open(dstdir + 'ADEChallengeData2016.zip', 'wb') as f: for data in response.iter_content(block_size): progress_bar.update(len(data)) f.write(data) progress_bar.close() assert total_size_in_bytes != 0 and progress_bar.n == total_size_in_bytes,\ "download ADE20k failed" if len(glob(dstdir + 'ADEChallengeData2016/images/validation/ADE_*.jpg')) != 2000: print('unzipping ADE20k...') from zipfile import ZipFile with ZipFile(dstdir + 'ADEChallengeData2016.zip', 'r') as zipObj: # Extract all the contents of zip file in current directory zipObj.extractall(dstdir) dstdir += 'ADEChallengeData2016/' print('convert grayscale images to RGB:', 'test') for dirname in ['training', 'validation']: imgs = glob(f'{dstdir}images/{dirname}/ADE_*.jpg') gray_idxs = [ i for i in range(len(imgs)) if len(Image.open(imgs[i]).getbands()) < 3 ] for rmidx in gray_idxs: im = Image.open(imgs[rmidx]) im = im.convert('RGB') im.save(imgs[rmidx]) print('converted L to RGB:', imgs[rmidx]) # plot random label sample print('start check ADE20k_label', 'test') check_ADE20k_label() converter = TfrecordConverter() patchdir = dstdir + 'patch/' if len(glob(patchdir + 'images/*/ADE_*_no*.jpg')) < 6e4: print('splitting imgs to patch...', flush=True) # split images into patch overlap_buffer = [overlap_buffer, overlap_buffer] for dirname in ['training', 'validation']: print('convert', dirname, 'into patch') os.makedirs(f'{patchdir}images/{dirname}', exist_ok=True) os.makedirs(f'{patchdir}annotations/{dirname}', exist_ok=True) srcimgs = glob(f'{dstdir}/images/{dirname}/ADE_*.jpg') for path in tqdm(srcimgs): im = np.array(Image.open(path)) lb = np.array( Image.open( os.sep.join( Path(path).parts[:-3] + ('annotations', dirname, Path(path).stem + '.png')))) img_patches = converter.split_to_patch(im, input_size, overlap_buffer, dtype=np.uint8) lbl_pathces = converter.split_to_patch(lb, input_size, overlap_buffer, dtype=np.uint8) basename = Path(path).stem for no, (img_patch, lbl_patch) in enumerate(zip(img_patches, lbl_pathces)): Image.fromarray(img_patch).save( f'{patchdir}images/{dirname}/{basename}_no{no}.jpg') Image.fromarray(lbl_patch).save( f'{patchdir}annotations/{dirname}/{basename}_no{no}.png' ) image_per_shards = 1000 if len(glob(dstdir + 'tfrecord/*_*.tfrecords')) != 101: print('convert ADE20k to tfrecord', flush=True) os.makedirs(dstdir + 'tfrecord', exist_ok=True) for dirname in ['training', 'validation']: imgs = glob(f'{patchdir}/images/{dirname}/ADE_*.jpg') # shuffle image order random.shuffle(imgs) path_labels = [ os.sep.join( Path(path).parts[:-3] + ('annotations', dirname, Path(path).stem + '.png')) for path in imgs ] converter.from_path_label(imgs, path_labels, dstdir + f'tfrecord/{dirname}.tfrecords', image_per_shards) path_tfrecord = DATADIR + 'ADE20k/ADEChallengeData2016/tfrecord/validation_1.tfrecords' # check converted tfrecord dc = DatasetCreator(False, 10, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) piyo = next(iter(ds.take(1))) plt.imshow(piyo[0][5])
def test_from_ary_label(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'num_transforms': 10 } BATCH_SIZE = 2 with Image.open(DATADIR + 'Lenna.png').convert('RGB') as img: image = np.asarray(img) image = np.tile(image, (10 * BATCH_SIZE, 1, 1, 1)) # add channel 4 image = np.concatenate([ image, np.zeros(image.shape[:3], dtype=np.uint8)[:, :, :, np.newaxis] ], axis=3) labels = list(range(10)) * BATCH_SIZE with self.subTest('classification'): # test for classification path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_ary_label(image, labels, path_tfrecord) # for preproc, set input dimension DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:3], 3] def preproc(img, lbl): return (img[:, :, :, :3], lbl) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, preproc=preproc, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 test = next(iter(ds)) for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert img.shape[3] == 3, "data shape is invalid" with self.subTest('segmentation'): #test for segmentation path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_ary_label(image, image, path_tfrecord) def preproc(img, lbl): return (img, lbl[:, :, :, :3]) DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:]] DATAGEN_CONF['input_label_shape'] = [ BATCH_SIZE, *image.shape[1:3], 3 ] dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, preproc=preproc, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert img.shape[3] == 4, "data shape is invalid" assert label.shape[3] == 3, "data shape is invalid" with self.subTest('no label'): # test for no labels path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' DATAGEN_CONF['input_shape'] = [BATCH_SIZE, *image.shape[1:]] TfrecordConverter().from_ary_label(image, None, path_tfrecord) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape == [BATCH_SIZE, *random_crop_size, 4], "crop size is invalid"
def test_private_functions_in_DatasetCreator(self): BATCH_SIZE = 2 img_org = Image.open(DATADIR + 'Lenna.png') shape = list(np.array(img_org).shape) fp32 = np.array(Image.open(DATADIR + 'Lenna.png')).astype( np.float32) // 256 Image.fromarray(fp32[:, :, 0]).save(DATADIR + 'Lenna.tif') clslabels = list(range(10)) flist_imgs = [(DATADIR + 'Lenna.png', DATADIR + 'Lenna.tif', DATADIR + 'Lenna.png') for i in range(10)] path_tfrecord = DATADIR + 'test_3_inimgs.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(1, BATCH_SIZE, training=True) path_tfrecords = [path_tfrecord, path_tfrecord] (ds, num_img, label_type, imgs_dtype, imgs_shape, labels_shape, labels_dtype) = dc._get_ds_tfrecord(1, path_tfrecords) # test _set_formats example_formats = dc._gen_example(label_type, labels_dtype, imgs_dtype, imgs_shape) decoders = dc._decoder_creator(label_type, labels_dtype, labels_shape, imgs_dtype, imgs_shape) assert example_formats['image_in0'].dtype == tf.string assert example_formats['image_in1'].dtype == tf.string assert example_formats['image_in2'].dtype == tf.string assert example_formats['label_in0'].dtype == tf.int64 ds_decoded = (ds.batch(BATCH_SIZE).apply( tf.data.experimental.parse_example_dataset(example_formats)).map( decoders)) # define augmentation datagen_confs = {'random_rotation': 5, 'num_transforms': 5} inputs_shape, input_label_shape = dc._get_inputs_shapes( ds_decoded, label_type, len(imgs_dtype)) seeds = np.random.uniform(0, 2**32, (int(1e6))) if len(imgs_dtype) > 1: # multiple input aug_funs = [] for shape in inputs_shape: datagen_confs['input_shape'] = shape aug_funs.append(AugmentImg(**datagen_confs, seeds=seeds)) if label_type == 'segmentation': datagen_confs['input_shape'] = input_label_shape aug_funs.append(AugmentImg(**datagen_confs, seeds=seeds)) elif label_type == 'class': aug_funs.append(lambda x: x) aug_fun = dc._apply_aug(aug_funs) ds_aug = ds_decoded.map(aug_fun) ds_out = ds_aug.map(dc._ds_to_dict(example_formats.keys())) test_ret = next(iter(ds_out)) assert test_ret['image_in0'].shape == [BATCH_SIZE, *imgs_shape[0] ], "invalid image 0 size" assert test_ret['image_in1'].shape == [BATCH_SIZE, *imgs_shape[1] ], "invalid image 1 size" assert test_ret['image_in2'].shape == [BATCH_SIZE, *imgs_shape[2] ], "invalid image 2 size" assert test_ret['label_in0'].shape == BATCH_SIZE, "invalid label size"
def test_multi_inputs_labels(self): BATCH_SIZE = 2 NUM_DATA = 10 img_org = np.array(Image.open(DATADIR + 'Lenna.png')) clslabels = list(range(NUM_DATA)) # test uint8 and float32 tiff # save as float32 tiff fp32 = np.array(Image.open(DATADIR + 'Lenna.png')).astype( np.float32) // 256 Image.fromarray(fp32[:, :, 0]).save(DATADIR + 'Lenna.tif') flist_imgs = [(DATADIR + 'Lenna.png', DATADIR + 'Lenna.tif', DATADIR + 'Lenna.png') for i in range(NUM_DATA)] with self.subTest('3 inputs 3 labels classification'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, [list(range(3))] * NUM_DATA, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert (inputs['label_in0'] == [ 0, 0 ]).numpy().all(), 'label_in0 is changed' assert (inputs['label_in1'] == [ 1, 1 ]).numpy().all(), 'label_in0 is changed' assert (inputs['label_in2'] == [ 2, 2 ]).numpy().all(), 'label_in0 is changed' with self.subTest('3 inputs classification'): # test 3 images in tfrecord and classification path_tfrecord = DATADIR + 'test_3_inimgs.tfrecord' TfrecordConverter().from_path_label(flist_imgs, clslabels, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert all(inputs['label_in0'].numpy() == clslabels[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]), 'label is changed' with self.subTest('3 inputs segmentation and augmentation'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, [DATADIR + 'Lenna.png'] * 10, path_tfrecord) dc = DatasetCreator( False, BATCH_SIZE, random_rotation=20, # random_contrast=[1.4, 2], num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): # assert (inputs[0]['image_in0'] == img_org).numpy( # ).all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == inputs['image_in0']).numpy( ).all( ), 'in_image0 and in_image2 do not have same transformation' assert (inputs['label_in0'] == inputs['image_in0']).numpy( ).all(), 'label_in0 and in_image0 is no changed' with self.subTest('3 inputs 3 labels segmentation'): # test 3 images in tfrecord and segmentation path_tfrecord = DATADIR + 'test_3_inimgs_seg.tfrecord' TfrecordConverter().from_path_label(flist_imgs, flist_imgs, path_tfrecord) dc = DatasetCreator(False, BATCH_SIZE, num_transforms=20, training=True) ds, datacnt = dc.from_tfrecords([path_tfrecord]) for i, inputs in enumerate(ds): assert (inputs['image_in0'] == img_org ).numpy().all(), 'in_image0 is changed' assert (inputs['image_in1'] == fp32 ).numpy().all(), 'in_image1 is changed' assert (inputs['image_in2'] == img_org ).numpy().all(), 'in_image2 is changed' assert (inputs['label_in0'] == img_org).numpy().all(), \ 'label_in0 is changed' assert (inputs['label_in1'] == fp32).numpy().all(), \ 'label_in1 is changed' assert (inputs['label_in2'] == img_org).numpy().all(), \ 'label_in2 is changed'
def test_from_tfrecord_sample_ratio(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_hue': 0.01, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'random_saturation': [0.5, 2], 'num_transforms': 10 } BATCH_SIZE = 5 flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE # test for ratio_samples labels = [0] * 10 * BATCH_SIZE path_tfrecord_0 = DATADIR + 'ds_from_tfrecord_0.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_0) labels = [1] * 10 * BATCH_SIZE path_tfrecord_1 = DATADIR + 'ds_from_tfrecord_1.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_1) dc = DatasetCreator(5, 10, repeat=False, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([0.1, 1000], dtype=np.float32)) img, label = next(iter(ds.take(1))) assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert all( label == 1), "sampled label is invalid this sometimes happen" ds, cnt = DatasetCreator(5, 50, repeat=False, **DATAGEN_CONF, training=True).from_tfrecords( [[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([1, 1], dtype=np.float32)) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 2, "repetition count is invalid" assert any(label == 1) and any(label == 0), "sampled label is invalid" # check for sampling ratio dc = DatasetCreator(5, 10, repeat=True, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([[path_tfrecord_0], [path_tfrecord_1]], ratio_samples=np.array([1, 10], dtype=np.float32)) ds = ds.take(200) cnt_1, cnt_0 = 0, 0 for img, label in ds: cnt_0 += (label.numpy() == 0).sum() cnt_1 += (label.numpy() == 1).sum() assert 1/10 - 1/100 < cnt_0 / cnt_1 < 1/10 + 1/100,\ "sampling ratio is invalid. this happen randomely. please retry:"\ + str(cnt_0/cnt_1)
def test_from_tfrecord(self): random_crop_size = [100, 254] # data augmentation configurations: DATAGEN_CONF = { 'standardize': True, 'resize': None, 'random_rotation': 5, 'random_flip_left_right': True, 'random_flip_up_down': False, 'random_shift': [.1, .1], 'random_zoom': [0.2, 0.2], 'random_shear': [5, 5], 'random_brightness': 0.2, 'random_hue': 0.01, 'random_contrast': [0.6, 1.4], 'random_crop': random_crop_size, 'random_noise': 100, 'random_saturation': [0.5, 2], 'num_transforms': 10 } BATCH_SIZE = 2 flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE labels = [0] * 10 * BATCH_SIZE # test for classification with self.subTest('for classification'): path_tfrecord_0 = DATADIR + 'ds_from_tfrecord_0.tfrecord' TfrecordConverter().from_path_label(flist, labels, path_tfrecord_0) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord_0]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" tool.plot_dsresult(ds.take(10), BATCH_SIZE, 10, DATADIR + 'test_ds_from_tfrecord.png') # test for segmentation with self.subTest('for segmentation'): path_tfrecord = DATADIR + 'ds_from_tfrecord.tfrecord' TfrecordConverter().from_path_label(flist, flist, path_tfrecord) dc = DatasetCreator(BATCH_SIZE * 10, BATCH_SIZE, **DATAGEN_CONF, training=True) ds, cnt = dc.from_tfrecords([path_tfrecord]) rep_cnt = 0 for img, label in iter(ds): rep_cnt += 1 assert rep_cnt == 10, "repetition count is invalid" assert img.shape[1:3] == random_crop_size, "crop size is invalid" assert label.shape[1:3] == random_crop_size, "crop size is invalid" tool.plot_dsresult(ds.take(10), BATCH_SIZE, 10, DATADIR + 'test_ds_from_tfrecord.png')
def test_from_path(self): # data augmentation configurations: DATAGEN_CONF = { 'resize': None, 'random_crop': None, # what to set random_crop 'num_transforms': 10 } BATCH_SIZE = 2 flist = [DATADIR + 'Lenna.png'] * 10 * BATCH_SIZE flist_crop = [DATADIR + 'Lenna_crop.png'] * 10 * BATCH_SIZE img_org = np.array(Image.open(flist[0])) img_crop_org = np.array(Image.open(flist_crop[0])) labels1 = [0] * 10 * BATCH_SIZE labels2 = [5] * 10 * BATCH_SIZE with self.subTest('single input and output'): ds = DatasetCreator(BATCH_SIZE*10, BATCH_SIZE, **DATAGEN_CONF,training=True).\ from_path(flist, labels1) img, label = next(iter(ds)) assert (img[0] == img_org).numpy().all(), "Image was changed" assert (label == 0).numpy().all(), "Labels were changed" with self.subTest( 'multipe segmentation inputs and multiple class outputs'): ds = DatasetCreator(BATCH_SIZE*10, BATCH_SIZE, **DATAGEN_CONF,training=True).\ from_path(list(zip(flist, flist_crop)), list(zip(labels1,labels2))) ret = next(iter(ds)) assert ( ret['image_in0'] == img_org).numpy().all(), "Image was changed" assert (ret['image_in1'] == img_crop_org ).numpy().all(), "Image was changed" assert (ret['label_in0'] == 0).numpy().all(), "Labels were changed" assert (ret['label_in1'] == 5).numpy().all(), "Labels were changed" with self.subTest( 'multipe segmentation inputs and multiple seg outputs'): ds = DatasetCreator(BATCH_SIZE*10, BATCH_SIZE, **DATAGEN_CONF,training=True).\ from_path(list(zip(flist, flist_crop)), list(zip(flist_crop,flist))) ret = next(iter(ds)) assert ( ret['image_in0'] == img_org).numpy().all(), "Image was changed" assert (ret['image_in1'] == img_crop_org ).numpy().all(), "Image was changed" assert (ret['label_in0'] == img_crop_org ).numpy().all(), "Labels were changed" assert (ret['label_in1'] == img_org ).numpy().all(), "Labels were changed"