def augmentation(self, images, labels): batches = [] batch_size = math.ceil(self.batch_size / self.number_batches_augmentation) for i in range(self.number_batches_augmentation - 1): batches.append( UnnormalizedBatch( images=images[i * batch_size:(i + 1) * batch_size], segmentation_maps=labels[i * batch_size:(i + 1) * batch_size])) batches.append( UnnormalizedBatch( images=images[(self.number_batches_augmentation - 1) * batch_size:self.batch_size], segmentation_maps=labels[(self.number_batches_augmentation - 1) * batch_size:self.batch_size])) # time_start = time.time() batches_aug = list( self.augmenting_pipeline.augment_batches(batches, background=True)) # time_end = time.time() # print("Augmentation done in %.2fs" % (time_end - time_start,)) return [image for batch in batches_aug for image in batch.images_aug], \ [label for batch in batches_aug for label in batch.segmentation_maps_aug]
def test_with_seed_start(self, mock_ia_seed): augseq = mock.MagicMock() augseq.augment_batch_.return_value = "augmented_batch_" image = np.zeros((1, 1, 3), dtype=np.uint8) batch = UnnormalizedBatch(images=[image]) batch_idx = 1 seed_start = 10 multicore.Pool._WORKER_AUGSEQ = augseq multicore.Pool._WORKER_SEED_START = seed_start result = multicore._Pool_worker(batch_idx, batch) # expected seeds used seed = seed_start + batch_idx seed_global_expected = ( iarandom.SEED_MIN_VALUE + (seed - 10**9) % (iarandom.SEED_MAX_VALUE - iarandom.SEED_MIN_VALUE)) seed_local_expected = ( iarandom.SEED_MIN_VALUE + seed % (iarandom.SEED_MAX_VALUE - iarandom.SEED_MIN_VALUE)) assert result == "augmented_batch_" assert augseq.augment_batch_.call_count == 1 augseq.augment_batch_.assert_called_once_with(batch) mock_ia_seed.assert_called_once_with(seed_global_expected) augseq.seed_.assert_called_once_with(seed_local_expected)
def example_multicore_augmentation(): print("Example: Multicore Augmentation") import skimage.data import imgaug as ia import imgaug.augmenters as iaa from imgaug.augmentables.batches import UnnormalizedBatch # Number of batches and batch size for this example nb_batches = 10 batch_size = 32 # Example augmentation sequence to run in the background augseq = iaa.Sequential( [iaa.Fliplr(0.5), iaa.CoarseDropout(p=0.1, size_percent=0.1)]) # For simplicity, we use the same image here many times astronaut = skimage.data.astronaut() astronaut = ia.imresize_single_image(astronaut, (64, 64)) # Make batches out of the example image (here: 10 batches, each 32 times # the example image) batches = [] for _ in range(nb_batches): batches.append(UnnormalizedBatch(images=[astronaut] * batch_size)) # Show the augmented images. # Note that augment_batches() returns a generator. for images_aug in augseq.augment_batches(batches, background=True): ia.imshow(ia.draw_grid(images_aug.images_aug, cols=8))
def add_noise(seq, data, batched=True): if batched: imgs = data.reshape(config.noise_batch, config.minibatch_size // config.noise_batch, config.height, config.width) imgs = [[ imgs[i][j] for j in range(config.minibatch_size // config.noise_batch) ] for i in range(config.noise_batch)] img_batches = [ UnnormalizedBatch(images=imgs[i]) for i in range(config.noise_batch) ] img_batches_aug = list( seq.augment_batches(img_batches, background=True)) imgs_aug = np.stack([ np.stack(img_batches_aug[i].images_aug) for i in range(config.noise_batch) ]).reshape(config.minibatch_size, 1, config.height, config.width) else: imgs = data.reshape(-1, config.height, config.width) imgs_aug = seq.augment_images(imgs) imgs_aug = np.stack( [img.reshape(-1, config.height, config.width) for img in imgs_aug]) return imgs_aug
def gen_batches(files, scale_bs=10, aug_bs=5, crop_size=600, scale=4): from imgaug.augmentables.batches import UnnormalizedBatch skip = 0 for xml_file in files: tree = ET.parse(xml_file) root = tree.getroot() img = root.find('path').text try: raw_img = Image.open(img) clean(raw_img) raw_img = ImageOps.exif_transpose(raw_img) # resize_image(raw_img, root, scale) # img_array = np.array(raw_img) img_array = resize_fix_shape(raw_img, root, crop_size) images = [img_array for _ in range(scale_bs)] bbs = [ ia.BoundingBox(int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text)) for member in root.findall('object') ] images_scale, bbs_scale = seq_scale( images=images, bounding_boxes=[bbs for _ in range(scale_bs)]) imgs = [im for im in images_scale for _ in range(aug_bs)] batche = UnnormalizedBatch(images=imgs, bounding_boxes=[ bbss for bbss in bbs_scale for _ in range(aug_bs) ]) except Exception as e: skip += 1 print(repr(e), f" skip {skip}") yield batche
def test_simple_call(self, mock_worker): image = np.zeros((1, 1, 3), dtype=np.uint8) batch = UnnormalizedBatch(images=[image]) batch_idx = 1 mock_worker.return_value = "returned_batch" result = multicore._Pool_starworker((batch_idx, batch)) assert result == "returned_batch" mock_worker.assert_called_once_with(batch_idx, batch)
def generate_data_I(self, ds, save_to_dir=None, prefix='test'): """ Generates augmented images using ImgAug library class. The results are persisted in disc. """ create_dir(save_to_dir) for imgs_batch, _ in ds.as_numpy_iterator(): batches = UnnormalizedBatch(images=(imgs_batch*255).astype(np.uint8)) images_aug = [next(seq.augment_batches(batches, background=True)).images_aug for i in range(5)] [imageio.imwrite("%s/%s_%d_%d.png" % (str(save_to_dir), prefix, i, random.randint(0, 1000),), ia_j) for i, images in enumerate(images_aug) for ia_j in images]
def test_without_seed_start(self): augseq = mock.MagicMock() augseq.augment_batch_.return_value = "augmented_batch_" image = np.zeros((1, 1, 3), dtype=np.uint8) batch = UnnormalizedBatch(images=[image]) multicore.Pool._WORKER_AUGSEQ = augseq result = multicore._Pool_worker(1, batch) assert result == "augmented_batch_" assert augseq.augment_batch_.call_count == 1 augseq.augment_batch_.assert_called_once_with(batch)
def gen_batches(files, bs=5, scale=4.5, must_rotate=True): from imgaug.augmentables.batches import UnnormalizedBatch batches = [] trees = [] for xml_file in files: tree = ET.parse(xml_file) root = tree.getroot() img = root.find('path').text os.path.join(*img.split("\\")) raw_img = Image.open(img) clean(raw_img) raw_img = ImageOps.exif_transpose(raw_img) # Reduce image size if scale > 1: resize_image(raw_img, root, scale) img_array = np.array(raw_img) bbs = [ ia.BoundingBox(int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text)) for member in root.findall('object') ] # Rotated if must_rotate: img_aug, bbs_aug = aug_by_value_list([img_array], [bbs], fit_output=True, rotate=MUST_ROTATE) else: img_aug, bbs_aug = [img_array], [bbs] # img_aug, bbs_aug = aug_by_value_list(img_aug, bbs_aug, scale=MUST_SCALE) # img_aug, bbs_aug = [], [] # Original # img_aug.insert(0, img_array) # bbs_aug.insert(0, bbs) images = [ img_aug_array for img_aug_array in img_aug for _ in range(bs) ] batches.append( UnnormalizedBatch(images=images, bounding_boxes=[ bbs_aug_array for bbs_aug_array in bbs_aug for _ in range(bs) ])) trees.append(tree) return batches, trees
def dictCropMultiples(baseImageListFunc, baseMaskListFunc, fullImageListFunc, segmapListFunc): cropMultiples_heightmultiple = 2 cropMultiples_widthmultiple = 2 cropMultiples_x00percent = cropMultiples_heightmultiple * cropMultiples_widthmultiple cropMultiples = iaa.CropToMultiplesOf(height_multiple=cropMultiples_heightmultiple, width_multiple=cropMultiples_widthmultiple) if PARALLEL_PROCESSING: batches = [UnnormalizedBatch(images=baseImageListFunc, segmentation_maps=baseMaskListFunc) for _ in range(cropMultiples_x00percent)] batches_aug = list(cropMultiples.augment_batches(batches, background=True)) for entry in batches_aug: fullImageListFunc.extend(entry.images_aug) segmapListFunc.extend(entry.segmentation_maps_aug) else: alteredImageListFunc, alteredMaskListFunc = expandList(baseImageListFunc, baseMaskListFunc, cropMultiples_x00percent) (alteredImageListFunc, alteredMaskListFunc) = cropMultiples(images=alteredImageListFunc, segmentation_maps=alteredMaskListFunc) fullImageListFunc.extend(alteredImageListFunc) segmapListFunc.extend(alteredMaskListFunc) return fullImageListFunc, segmapListFunc
def dictShotNoise(baseImageListFunc, baseMaskListFunc, fullImageListFunc, segmapListFunc): print('Shot noise, starting number of images:', len(segmapListFunc)) shotNoise_x00percent = 5 shotNoise = iaa.imgcorruptlike.ShotNoise(severity=1) if PARALLEL_PROCESSING: batches = [UnnormalizedBatch(images=baseImageListFunc, segmentation_maps=baseMaskListFunc) for _ in range(shotNoise_x00percent)] batches_aug = list(shotNoise.augment_batches(batches, background=True)) for entry in batches_aug: fullImageListFunc.extend(entry.images_aug) segmapListFunc.extend(entry.segmentation_maps_aug) else: alteredImageListFunc, alteredMaskListFunc = expandList(baseImageListFunc, baseMaskListFunc, shotNoise_x00percent) (alteredImageListFunc, alteredMaskListFunc) = shotNoise(images=alteredImageListFunc, segmentation_maps=alteredMaskListFunc) fullImageListFunc.extend(alteredImageListFunc) segmapListFunc.extend(alteredMaskListFunc) return fullImageListFunc, segmapListFunc
def main(): # Configure imgaug ia.seed(1) ######################################################### # Parse arguments ######################################################### parser = argparse.ArgumentParser( description= 'Applies a set of augmentations to every image in the input directory.' ) parser.add_argument('--input_directory', '-i', type=str, help='e.g. "./downloads/"') parser.add_argument('--output_directory', '-o', type=str, help='e.g. "./downloads/"') parser.add_argument( '--single_threaded', '-s', action='store_true', help='Process images in one thread instead of multithreading') parser.add_argument('--preview_only', '-p', action='store_true', help='Show previews instead of writing to disk') parser.add_argument( '--skip_originals', action='store_true', help= 'Prevent original images from being copied into the destination folder' ) args = parser.parse_args() input_directory = args.input_directory output_directory = args.output_directory user_requested_preview_only = args.preview_only single_threaded = args.single_threaded skip_originals = args.skip_originals ######################################################### # Process all data files in the input directory, either # copying them over or queueing them to be augmented in # the next step. ######################################################### # Load YOLO region class names from file class_names = [] with open(os.path.join(input_directory, "class.names")) as class_file: class_names = [ line.rstrip() for line in class_file if line.rstrip() != "" ] if not user_requested_preview_only: # Copy MyAugments.py to the output directory copyfile(os.path.join(os.getcwd(), "MyAugments.py"), os.path.join(output_directory, "MyAugments.py")) # Copy the YOLO region class names file to the output directory copyfile(os.path.join(input_directory, "class.names"), os.path.join(output_directory, "class.names")) augment_files = [] filenames = os.listdir(input_directory) filenames.sort() for filename in filenames: # Work only on YOLO .txt files. if not filename.endswith(".txt"): continue base_filename = os.path.splitext(filename)[0] data_path = os.path.join(input_directory, base_filename + ".txt") image_path = os.path.join(input_directory, base_filename + ".jpg") augment_files.append(DataPair(data_path, image_path)) if not skip_originals and not user_requested_preview_only: # Copy the original data file to the output directory. copyfile(os.path.join(input_directory, base_filename + ".txt"), os.path.join(output_directory, base_filename + ".txt")) # Copy the original image to the output directory. copyfile(os.path.join(input_directory, base_filename + ".jpg"), os.path.join(output_directory, base_filename + ".jpg")) ######################################################### # From the list of data/image pairs to augment, create # batches for imgaug to process. ######################################################### batch = [] batches = [] MAX_BATCH_SIZE = 10 if user_requested_preview_only else 16 for i, item in enumerate(augment_files): # Load image into memory image = imageio.imread(item.image_path) # Get imgaug representation of bounding boxes image_data = ImageData.from_yolo_data(item.data_path, class_names) bbs = image_data.to_imgaug(image.shape) batch.append((image, bbs, item)) # If we're at the max batch size or the end of the file list, # finalize the batch and add it to the batch list. if len(batch) == MAX_BATCH_SIZE or i == len(augment_files) - 1: images, bounding_boxes, data = list(zip(*batch)) batches.append( UnnormalizedBatch(images=images, bounding_boxes=bounding_boxes, data=data)) batch.clear() ######################################################### # Apply each operation in MyAugments.py to each image ######################################################### should_multithread = not single_threaded and not user_requested_preview_only ops = get_augmentation_operations() total_ops_per_image = sum([op.num_repetitions for op in ops]) input_image_count = sum([len(b.data) for b in batches]) generated_image_count = total_ops_per_image * input_image_count print(f"{generated_image_count} new images will be created.") progress_bar = tqdm(total=generated_image_count) for op in ops: for i in range(op.num_repetitions): # Produce augmentations for batches_aug in op.operation.augment_batches( batches, background=should_multithread): if user_requested_preview_only: # Preview output one batch at a time. # Blocks execution until the window is closed. # Closing a window will cause the next batch to appear. # Close the Python instance in the dock to stop execution. images_with_labels = [ bb.draw_on_image(image) for image, bb in zip(batches_aug.images_aug, batches_aug.bounding_boxes_aug) ] grid_image = ia.draw_grid(images_with_labels, cols=None, rows=None) title = f"{op.name}\nRep {i}\n" # title += ", ".join([item.image_filename for item in batches_aug.data]) # Draw image filenames grid_image = ia.draw_text(grid_image, 8, 8, title, color=(255, 0, 0), size=50) ia.imshow(grid_image, backend='matplotlib') continue for image, bbs, data in zip(batches_aug.images_aug, batches_aug.bounding_boxes_aug, batches_aug.data): # Write image and matching data file to output folder # Determine base name for image and matching data file image_filename_no_extension, image_extension = os.path.splitext( Path(data.image_path).name) base_filename = "" if op.num_repetitions == 1: base_filename = f"{image_filename_no_extension}_{op.name}" else: base_filename = f"{image_filename_no_extension}_{op.name}_rep{i}" # Write image to output folder output_image_path = os.path.join( output_directory, f"{base_filename}{image_extension}") imageio.imwrite(output_image_path, image) # Write modified imgaug bounding boxes as YOLO format in output folder image_height, image_width, _ = image.shape output_data = ImageData.from_imagaug( image_width, image_height, bbs) output_data.write_yolo(data.data_path, class_names) # Update progress bar progress_bar.update(1) progress_bar.close()
def batch_op_sub_chip_augmentation(x_chips_total, y_chips_total, amt=1000, test=True, test_size=256, ISZ=256, random_seed=42): if test: time_start = time.time() amt = test_size amt = int(amt // 128 * 128) print('test selected, amt:', amt) else: amt = int(amt // 128 * 128) print(amt, 'non-test amt here') dataset_table = find_datasets(inDir) if len(x_chips_total) < 50: for index in range(len(dataset_table[0])): selected_x = dataset_table[0][index] selected_y = dataset_table[1][index] # seed = 15 # seed = np.random.randint(1,5000) print(selected_x, selected_y) chips_x = np.load(inDir + '/x_set/' + selected_x) chips_y = np.load(inDir + '/y_set/' + selected_y) x_windows, y_windows, dim_Max = window_creation( chips_x, chips_y, ISZ) del chips_x, chips_y # print(dim_Max) x_chips_verified, y_chips_verified = chip_verification( x_windows, y_windows, dim_Max, ISZ) del x_windows, y_windows # if test: # print(x_chips_verified.shape, y_chips_verified.shape) # np.random.seed(42) # np.random.shuffle(dataset_table[0]) # np.random.seed(42) # np.random.shuffle(dataset_table[1]) np.random.seed(random_seed) np.random.shuffle(x_chips_verified) np.random.seed(random_seed) np.random.shuffle(y_chips_verified) print(x_chips_verified.shape, y_chips_verified.shape) if len(x_chips_verified) < 500: x_chips_verified, y_chips_verified = pad_valid( x_chips_verified, y_chips_verified, 100) # x_chips_verified = x_chips_verified[:500] # y_chips_verified = y_chips_verified[:500] if index == 0: x_chips_total = x_chips_verified y_chips_total = y_chips_verified if index > 0: x_chips_total = np.append(x_chips_total, x_chips_verified, axis=0) y_chips_total = np.append(y_chips_total, y_chips_verified, axis=0) del x_chips_verified, y_chips_verified print(x_chips_total.shape, y_chips_total.shape) if test: break np.random.seed(random_seed * 2) np.random.shuffle(x_chips_total) np.random.seed(random_seed * 2) np.random.shuffle(y_chips_total) print(x_chips_total.shape, y_chips_total.shape) # x_chips_padded = x_chips_total#[:amt] # y_chips_padded = y_chips_total#[:amt] # del x_chips_total, y_chips_total # print(x_chips_padded.shape, y_chips_padded.shape) BATCH_SIZE = 128 NB_BATCHES = int(amt / 128) images_batch = [x_chips_total[_] for _ in range(BATCH_SIZE)] segmentation_maps_batch = [y_chips_total[_] for _ in range(BATCH_SIZE)] # images_batch = [x_chips_padded[_] for _ in range(BATCH_SIZE)] # segmentation_maps_batch = [y_chips_padded[_] for _ in range(BATCH_SIZE)] # del x_chips_padded, y_chips_padded del x_chips_total, y_chips_total print('images_batch & segmentation_maps_batch loaded') batches = [ UnnormalizedBatch(images=images_batch, segmentation_maps=segmentation_maps_batch) for _ in range(NB_BATCHES) ] # seq = simple_seq() seq = aug_seq() print('seq loaded') batches_aug = list(seq.augment_batches(batches, background=False)) print('augmentation finished') if test: time_end = time.time() print("Complete load & augmentation pipeline done in %.2fs" % (time_end - time_start, )) # print("Resizing & returning augmented x_trn, y_trn datasets") print('resizing img & msk') img = np.array([batches_aug[a].images_aug for a in range(NB_BATCHES)]).reshape(amt, ISZ, ISZ, 3) msk = (np.array([ batches_aug[a].segmentation_maps_aug for a in range(NB_BATCHES) ]).reshape(amt, ISZ, ISZ, 1)) print(img.shape, msk.shape) img = np.array(img).reshape(amt, ISZ, ISZ, 3) # img = [stretch_n(i) for i in img] # print(img.shape) img = np.array(img).reshape(amt, ISZ, ISZ, 3) print(img.dtype) print(img.shape) # img = img / 255. # img = img.astype('uint8') print(msk.dtype) print(msk.shape) msk = msk.clip(max=1) print(msk.dtype) return img, msk
def test_data_load2(self): """testing data_load.data_load """ input_file = '../data/sample.csv' input_file = '../data/train.csv' image_size = 28 training_dataset, testing_dataset, validating_dataset, training_labels, testing_labels, validating_labels = \ data_load.data_load(input_file, image_size) from matplotlib import pyplot as plt import random random_picture = random.randint(0, len(training_dataset)) print("random int : ", random_picture) plt.imshow(training_dataset[random_picture].reshape(28, 28), interpolation='nearest') plt.show() print(training_labels[random_picture]) import imgaug.augmenters as iaa # seq = iaa.Sequential([ # iaa.Crop(px=(1, 16), keep_size=False), # iaa.Fliplr(0.5), # iaa.GaussianBlur(sigma=(0, 3.0))]) # for i in range(10): # seq = iaa.Affine(translate_px=(-3, 3)) # new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() # for i in range(10): # seq = iaa.Affine(translate_px={"x": (-3, 3), "y": (-3, 3)}) # new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() # for i in range(10): # print("i : ", i) # seq = iaa.Affine(shear=(-16, 16)) # new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() # for i in range(10): # print("i : ", i) # seq = iaa.Affine(scale=(0.80, 1.20)) # new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() # # for i in range(10): # print("i : ", i) # seq = iaa.Affine(rotate=(-35, 35)) # new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() sometimes = lambda aug: iaa.Sometimes(0.5, aug) #import imgaug as ia import time import numpy as np from imgaug.augmentables.batches import UnnormalizedBatch from imgaug import multicore for i in range(1): #print("i : ", i) BATCH_SIZE = 16 NB_BATCHES = 9 batches = [ UnnormalizedBatch(images=training_dataset, data=training_labels) for _ in range(NB_BATCHES) ] #seq = iaa.GaussianBlur(sigma=(0.1, 0.5)) aug = iaa.Sequential([ #iaa.Affine(translate_px=(-3, 3)) sometimes(iaa.Affine(translate_px={ "x": (-3, 3), "y": (-3, 3) })), sometimes(iaa.Affine(scale=(0.80, 1.20))), sometimes(iaa.Affine(rotate=(-35, 35))), sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5))) # iaa.PiecewiseAffine(scale=0.05, nb_cols=6, nb_rows=6), # very slow # iaa.Fliplr(0.5), # very fast # iaa.CropAndPad(px=(-10, 10)) # very fast ]) #new_images = seq(images=training_dataset) # plt.imshow(new_images[70].reshape(28, 28), interpolation='nearest') # plt.show() #ia.imshow(new_images[70].reshape(28, 28)) time_start = time.time() #batches_aug = list(seq.augment_batches(batches, background=True)) with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool: batches_aug = pool.map_batches(batches) for i in range(NB_BATCHES): plt.imshow(batches_aug[i].images_aug[random_picture].reshape( 28, 28), interpolation='nearest') plt.show() print(batches_aug[i].data[random_picture]) # plt.imshow(batches_aug[0].images_aug[random_picture].reshape(28, 28), interpolation='nearest') # plt.show() time_end = time.time() print("Augmentation done in %.2fs" % (time_end - time_start, )) all_training_dataset = training_dataset all_training_labels = training_labels for i in range(NB_BATCHES): all_training_dataset = np.concatenate( (all_training_dataset, batches_aug[i].images_aug)) all_training_labels = np.concatenate( (all_training_labels, batches_aug[i].data)) print(" len of all_training_dataset : ", len(all_training_dataset)) print(" len of all_training_labels : ", len(all_training_labels)) print(" end ")
def augment_data(images_dataset, labels_dataset, multiplier): # size (in pictures) of one job send to the child process to work on max_size_of_one_job = 30 time_start = time.time() # some funny lambda function to randomly decide to make augmentation or not sometimes = lambda aug: iaa.Sometimes(0.5, aug) # we do following augmentations with probability of 50% # - translation in the plane xy by +-3 pixels # - scaling from 80% to 120% # - rotation by +- 35deg # - gaussian blur aug = iaa.Sequential([ sometimes(iaa.Affine(translate_px={ "x": (-3, 3), "y": (-3, 3) })), sometimes(iaa.Affine(scale=(0.80, 1.20))), sometimes(iaa.Affine(rotate=(-35, 35))), sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5))) ]) # calculate how many pieces of array with training data we can get using max number of pictures # that should be processed per one child process pieces = len(images_dataset) // max_size_of_one_job if pieces == 0: pieces = 1 # print("before if check ", pieces) # if len(images_dataset) % (pieces * max_size_of_one_job) != 0: # pieces = pieces + 1 # print("in if check ", pieces) # split training data into pieces split_training_dataset = np.array_split(images_dataset, pieces) split_training_labels = np.array_split(labels_dataset, pieces) batches = [] # for each piece generate batches that will be augmented for i in range(pieces): batches = batches + [ UnnormalizedBatch(images=split_training_dataset[i], data=split_training_labels[i]) for _ in range(multiplier) ] # run jobs in 32 child processes with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool: print("sending for augmentation batches : ", len(batches)) batches_aug = pool.map_batches(batches) # concatenate all data back together all_images_dataset = np.concatenate( (images_dataset, np.concatenate(([i.images_aug[:] for i in batches_aug])))) all_labels_dataset = np.concatenate( (labels_dataset, np.concatenate(([i.data[:] for i in batches_aug])))) time_end = time.time() print("Augmentation done in %.2fs" % (time_end - time_start, )) print(" len of all_images_dataset : ", len(all_images_dataset)) print(" len of all_labels_dataset : ", len(all_labels_dataset)) print(" end ") return all_images_dataset, all_labels_dataset
def get_image_batches(images, nb_batches=NB_BATCHES): return [UnnormalizedBatch(images=images) for _ in range(nb_batches)]
def generate_plate_multicore(self, plate_numbers, bg_color, is_double, enhance=False): """ 生成特定号码、颜色车牌 :param plate_numbers: 车牌号码列表 :param bg_color: 背景颜色 :param is_double: 是否双层 :param enhance: 图像增强 :return: 车牌图 """ if 'motor' in bg_color: height = 140 width = 220 elif 'dishu' in bg_color: height = 165 width = 300 elif 'macau' in bg_color: height = 120 width = 520 else: height = 220 if is_double else 140 plate_images = list() for plate_number in plate_numbers: print('车牌号是:{}'.format(plate_number), "\n", '车牌高度是:{}'.format(height), "\n", '车牌底颜色是:{}'.format(bg_color), "\n", '是否双行:{}'.format(is_double)) if "army" in bg_color: number_xy = self.get_location_multi_WJ(plate_number, height) else: number_xy = self.get_location_multi(plate_number, height) print(number_xy) img_plate_model = cv2.imread( os.path.join(self.adr_plate_model, '{}_{}.PNG'.format(bg_color, height))) print(img_plate_model.shape) if "motor" in bg_color or 'dishu' in bg_color or 'macau' in bg_color: img_plate_model = cv2.resize(img_plate_model, (width, height)) else: img_plate_model = cv2.resize( img_plate_model, (480 if 'green' in bg_color else 440, height)) print(img_plate_model.shape) for i in range(len(plate_number)): if len(plate_number) == 8 and 'green' in bg_color: font_img = self.font_imgs['green_{}'.format( plate_number[i])] # 加入低速车车牌 elif 'dishu' in bg_color: if 'farm' in bg_color: if i == 0: font_img = self.font_imgs['motor_up_{}'.format( plate_number[i])] elif i == 1 or i == 2: font_img = self.font_imgs['140_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (30, 45)) else: font_img = self.font_imgs['140_{}'.format( plate_number[i])] else: if i == 0: font_img = self.font_imgs['220_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (55, 45)) elif i == 1: font_img = self.font_imgs['220_up_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (55, 45)) else: font_img = self.font_imgs['140_{}'.format( plate_number[i])] # 加入澳门车牌 elif 'macau' in bg_color: font_img = self.font_imgs['140_{}'.format(plate_number[i])] font_img = cv2.resize(font_img, (65, 100)) # 加入武警单行地方车牌 elif len(plate_number) == 8 and ('army' in bg_color and is_double == False): if i == 1: font_img = self.font_imgs['{}_{}'.format( height, plate_number[i])] font_img = cv2.resize(font_img, (30, 90)) elif i == 2: font_img = self.font_imgs['{}_{}'.format( height, plate_number[i])] font_img = cv2.resize(font_img, (40, 90)) else: font_img = self.font_imgs['{}_{}'.format( height, plate_number[i])] # 加入武警双行大车车牌 elif 'army' in bg_color and is_double: if i < 3: if i == 0: font_img = self.font_imgs['220_up_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (55, 50)) if i == 1: font_img = self.font_imgs['140_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (25, 50)) if i == 2: font_img = self.font_imgs['220_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (80, 50)) else: if plate_number[i] in digits: font_img = self.font_imgs['220_{}'.format( plate_number[i])] else: font_img = self.font_imgs['220_down_{}'.format( plate_number[i])] else: if '{}_{}'.format(height, plate_number[i]) in self.font_imgs: # 更改WJ中J的尺寸,武警小车车牌 if 'army' in bg_color and i == 1: font_img = self.font_imgs['{}_{}'.format( height, plate_number[i])] font_img = cv2.resize(font_img, (30, 90)) else: font_img = self.font_imgs['{}_{}'.format( height, plate_number[i])] # 加入摩托车 elif '{}_{}'.format(bg_color, plate_number[i]) in self.font_imgs: if len(plate_number) == 7: if i < 2: font_img = self.font_imgs['motor_up_{}'.format( plate_number[i])] else: font_img = self.font_imgs['140_{}'.format( plate_number[i])] else: if i < 1: font_img = self.font_imgs['motor_up_{}'.format( plate_number[i])] font_img = cv2.resize(font_img, (80, 50)) else: font_img = self.font_imgs['140_{}'.format( plate_number[i])] else: if i < 2: font_img = self.font_imgs['220_up_{}'.format( plate_number[i])] else: font_img = self.font_imgs['220_down_{}'.format( plate_number[i])] if plate_number[i] in [ '警', '使', '领' ] or ('army' in bg_color and i == 0) or ('army' in bg_color and i == 1): is_red = True # 加入武警变红规则 elif plate_number[i] in provinces and 'army' in bg_color: is_red = True # 加入军牌变红规则 elif 'jun' in bg_color and (i == 0 or i == 1): is_red = True elif plate_number[i] in letters and ( i == 6 or i == 7) and 'army' in bg_color: is_red = True else: is_red = False if enhance: k = np.random.randint(1, 6) kernel = np.ones((k, k), np.uint8) if np.random.random(1) > 0.5: font_img = np.copy( cv2.erode(font_img, kernel, iterations=1)) else: font_img = np.copy( cv2.dilate(font_img, kernel, iterations=1)) img_plate = copy_to_image_multi(img_plate_model, font_img, number_xy[i, :], bg_color, is_red) img_plate = cv2.blur(img_plate, (3, 3)) plate_images.append(img_plate) # 多核数据增强 import time time_start = time.time() augment = ImageAugmentation() batches = [UnnormalizedBatch(images=plate_images)] # 自行定义数据增强的方法 aug = iaa.Sequential([ iaa.imgcorruptlike.Snow(severity=3), iaa.GaussianBlur(0.5), iaa.CropAndPad(px=(-10, 10)), #iaa.Lambda(func_images=light_change_right_trap_multi) ]) batches_aug = list(aug.augment_batches(batches, background=True)) images = augment.light_change_right_trap(batches_aug[0], flag='x') time_end = time.time() print("Augmentation done in %.2fs" % (time_end - time_start, )) #img_plate = augment.gaussian_noise_iaa(img_plate) #img_plate = augment.add_smudge(img_plate) return images
def test_data_load4(self): """testing data_load.data_load """ input_file = '../data/sample.csv' input_file = '../data/train.csv' image_size = 28 training_dataset, testing_dataset, validating_dataset, training_labels, testing_labels, validating_labels = \ data_load.data_load(input_file, image_size) from matplotlib import pyplot as plt import random random_picture = random.randint(0, len(training_dataset)) print("random int : ", random_picture) plt.imshow(training_dataset[random_picture].reshape(28, 28), interpolation='nearest') plt.show() print(training_labels[random_picture]) import imgaug.augmenters as iaa import time import numpy as np from imgaug.augmentables.batches import UnnormalizedBatch #from imgaug import multicore #import imgaug as ia sometimes = lambda aug: iaa.Sometimes(0.5, aug) #BATCH_SIZE = 16 NB_BATCHES = 200 #max_size_of_one_job = 301 max_size_of_one_job = 300 #seq = iaa.GaussianBlur(sigma=(0.1, 0.5)) aug = iaa.Sequential([ sometimes(iaa.Affine(translate_px={ "x": (-3, 3), "y": (-3, 3) })), sometimes(iaa.Affine(scale=(0.80, 1.20))), sometimes(iaa.Affine(rotate=(-35, 35))), sometimes(iaa.GaussianBlur(sigma=(0.1, 0.5))) ]) time_start = time.time() #all_training_dataset = training_dataset #all_training_labels = training_labels #print("initial size of the all_training_dataset : ", len(all_training_dataset)) with aug.pool(processes=32, maxtasksperchild=200, seed=1) as pool: # calculate how many pieces of array with training data we can get using max number of pictures # that should be processed per one child process pieces = len(training_dataset) // max_size_of_one_job print("before if check ", pieces) if len(training_dataset) % (pieces * max_size_of_one_job) != 0: pieces = pieces + 1 print("in if check ", pieces) # split training data into pieces split_training_dataset = np.array_split(training_dataset, pieces) split_training_labels = np.array_split(training_labels, pieces) batches = [] # for each piece generate batches that will be augmented for i in range(pieces): batches = batches + [ UnnormalizedBatch(images=split_training_dataset[i], data=split_training_labels[i]) for _ in range(NB_BATCHES) ] # call sending tasks to children print("sending for augmentation batches : ", len(batches)) batches_aug = pool.map_batches(batches) # join together all augmented sets (including original pictures) print("joining all the batches with original pictures") #for i in range(len(batches_aug)): #print(i) #all_training_dataset = np.concatenate((all_training_dataset, batches_aug[i].images_aug)) #all_training_labels = np.concatenate((all_training_labels, batches_aug[i].data)) #x = (i.images_aug[:] for i in batches_aug) #y = np.concatenate(x) all_training_dataset = np.concatenate( (training_dataset, np.concatenate(([i.images_aug[:] for i in batches_aug])))) #all_training_dataset = np.concatenate((all_training_dataset, (i.images_aug for i in batches_aug)) ) all_training_labels = np.concatenate( (training_labels, np.concatenate(([i.data[:] for i in batches_aug])))) #print("intermediate size of the all_training_dataset : ", len(all_training_dataset)) print("final size of the all_training_dataset : ", len(all_training_dataset)) # print some random pictures #import matplotlib.pyplot as plt plt.imshow(all_training_dataset[79].reshape(28, 28), interpolation='nearest') plt.show() print(all_training_labels[79]) plt.imshow(all_training_dataset[81].reshape(28, 28), interpolation='nearest') plt.show() print(all_training_labels[81]) print("----------------") for i in range(10): random_picture = random.randint(0, len(all_training_dataset)) plt.imshow(all_training_dataset[random_picture].reshape(28, 28), interpolation='nearest') plt.show() print(all_training_labels[random_picture]) # plt.imshow(batches_aug[0].images_aug[random_picture].reshape(28, 28), interpolation='nearest') # plt.show() time_end = time.time() print("Augmentation done in %.2fs" % (time_end - time_start, )) print(" len of all_training_dataset : ", len(all_training_dataset)) print(" len of all_training_labels : ", len(all_training_labels)) print(" end ")