def __init__(self, path, input_size, batch_size, augmentations, mode='train'): ''' Initialises the attributes of the class Args: path (str): Path to the dataset directory input_size (int): Height/Width of the input image batch_size (int): Batch size to be used augmentations (str): If set to "train", image augmentations are applied Returns: None ''' random.seed(42) labels = { name: index for index in range(1) for name in glob.glob(path + '/*.JPG') } l = list(labels.items()) labels = dict(l) self.path = path self.names = list(labels.keys()) self.labels = list(labels.values()) self.input_size = input_size self.batch_size = batch_size AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.5), RandomContrast(limit=0.2, p=0.5), RandomGamma(gamma_limit=(80, 120), p=0.5), RandomBrightness(limit=0.2, p=0.5), HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, val_shift_limit=10, p=.9), RandomRotate90(), Resize(input_size, input_size), ToFloat(max_value=255) ]) AUGMENTATIONS_TEST = Compose( [Resize(input_size, input_size), ToFloat(max_value=255)]) self.augment = AUGMENTATIONS_TRAIN if augmentations == 'train' else AUGMENTATIONS_TEST
def Auger(self): List_Transforms = [] if self.is_train: List_Transforms.extend([ HueSaturationValue(10, 10, 10, p=0.3), HorizontalFlip(0.3), VerticalFlip(p=0.3), # May be it not work,will rescale [0,255] -> [0.0,1.0] ToFloat(always_apply=True), ShiftScaleRotate( shift_limit=0.1, # no resizing scale_limit=0.1, rotate_limit=3, # rotate p=0.5, border_mode=cv2.BORDER_REFLECT), PadIfNeeded(self.padshape, self.padshape), ]) List_Transforms.extend([ # [0.12110683835022196, 0.1308642819666743, 0.14265566800591103] #Normalize(mean=(0.397657144,0.351649219,0.305031406),std=(0.12110683835022196, 0.1308642819666743, 0.14265566800591103)), RandomCrop(self.shape, self.shape), ToTensor(), ]) TR = Compose(List_Transforms) return TR
def light_aug(p=1): return Compose( [ # albumentations supports uint8 and float32 inputs. For the latter, all # values must lie in the range [0.0, 1.0]. To apply augmentations, we # first use a `ToFloat()` transformation, which will inspect the data # type of the input image and convert the image to a float32 ndarray where # all values lie in the required range [0.0, 1.0]. ToFloat(), RandomBrightness(limit=(0, 0.2), p=0.2), # Alternatively, you can specify the maximum possible value for your input # and all values will be divided by it instead of using a predefined value # for a specific data type. # ToFloat(max_value=65535.0), # Then we will apply augmentations HorizontalFlip(p=0.1), ShiftScaleRotate( shift_limit=1 / 14, scale_limit=0.1, rotate_limit=15, p=0.9), OpticalDistortion(distort_limit=0.1, shift_limit=0.05, p=0.5), # You can convert the augmented image back to its original # data type by using `FromFloat`. # FromFloat(dtype='uint16'), # As in `ToFloat` you can specify a `max_value` argument and all input values # will be multiplied by it. FromFloat(dtype='uint16', max_value=65535.0), ], p=p, additional_targets={"image1": "image"})
def make_transforms(data_shape, resize=None, windows=('soft_tissue', ), windows_force_rgb=True, max_value=1.0, apply_crop=True, **kwargs): transforms = [] if resize == 'auto': resize = data_shape if resize: transforms.append(Resize(*resize)) if apply_crop: transforms.append(CenterCrop(height=data_shape[1], width=data_shape[0])) transforms.append( ChannelWindowing( windows=windows, force_rgb=windows_force_rgb, )) transforms.append(ToFloat(max_value=max_value)) return Compose(transforms)
def strong_aug(p=1): return Compose([ ToFloat(), RandomRotate90(), Flip(), Transpose(), OneOf([ IAAAdditiveGaussianNoise(), GaussNoise(), ], p=0.2), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate( shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), GridDistortion(p=0.1), IAAPiecewiseAffine(p=0.3), ], p=0.2), FromFloat(dtype='uint16', max_value=65535.0) ], p=p)
def test_transform(from_dicom: bool) -> Compose: transforms = [ ToFloat(), ToTensorV2(), ] if from_dicom: transforms.insert(0, Lambda(image=stack_channels_for_rgb)) return Compose(transforms)
def standarize_image(image): mean = (0.485, 0.456, 0.406) std = (0.229, 0.224, 0.225) tf = ToFloat(max_value=255.0) norm = Normalize(mean=mean, std=std, max_pixel_value=1.0) image = tf(image=image)['image'] image = norm(image=image)['image'] return image
def evaluate_model(model , statedict_PATH, num_images, img_size = (56 , 56)): """Evaluate model using some data """ model.load_state_dict(torch.load(PATH)) model.eval() batch_size = 256 test_augmentations = Compose([ Resize(*img_size), ToFloat(max_value = 255), ToTensor()], p = 1) test_df = pd.read_csv(f"data/test.csv") test_df = test_df.reset_index() test_dataset = test_digitdataset(data = test_df , transform = test_augmentations) test_loader = DataLoader(test_dataset , batch_size = batch_size , shuffle = False) test_tq = tq(test_loader , total = int(len(test_loader))) preds, labels = [], [] with torch.no_grad(): for (images , label) in test_tq: images = images["image"].to(device , dtype = torch.float) outputs = model(images) preds.extend(outputs.cpu().numpy()) labels.extend(label.cpu().numpy() + 1) preds = np.array(preds) preds = np.argmax(np.array(preds) , axis = 1).reshape(-1) fig , axes = plt.subplots(nrows = num_images//4 + 1 , ncols = 4, figsize=(64,64), sharex = True, sharey = True) counter = 0 for row in axes: for col in row: col.imshow(images[counter].squeeze().detach().permute(1 , 2 , 0).cpu().numpy()) col.set_title(f"pred = {preds[counter]}") counter += 1 test_preds = pd.read_csv(f"data/sample_submission.csv") test_preds.ImageId = labels test_preds.Label = preds save_file = f"data/sample_submission_temp.csv" if os.path.exists(save_file): os.remove(save_file) test_preds.to_csv(save_file , index= False) print("Submission file created successfully")
def __init__(self, time_series_metadata: Dict, time_series_frames: List[TimeseriesMetadataFrameKey], batch_size=32, dim=(NETWORK_DEMS, NETWORK_DEMS), time_steps=TIME_STEPS, min_time_steps=MIN_TIME_STEPS, n_channels=2, output_dim=(NETWORK_DEMS, NETWORK_DEMS), output_channels=1, n_classes=7, shuffle=True, dataset_directory="", clip_range: Optional[Tuple[float, float]] = None, training=True, augmentations=Compose([ ToFloat(max_value=255, p=0.0), ]), min_samples=20000): self.class_mode = 'categorical' self.list_IDs = time_series_metadata self.frame_data = time_series_frames self.dataset_directory = dataset_directory self.batch_size = batch_size self.dim = dim self.training = training self.time_steps = time_steps self.min_time_steps = min_time_steps self.n_channels = n_channels self.output_dim = output_dim self.output_channels = output_channels self.n_classes = n_classes self.shuffle = shuffle self.clip_range = clip_range self.augment = augmentations self.metadata = [] self.init = False if training: self.__meet_min_steps() while len(self.frame_data) < min_samples: self.frame_data.extend( random.sample( self.frame_data, min(min_samples - len(self.frame_data), len(self.frame_data)))) # print("PRE INIT:\n", len(self.frame_data)) self.frame_data = [ CDLFrameData(sample[0], sample[1], dataset_directory) for sample in self.frame_data ] # print(len(self.frame_data), "\n") self.on_epoch_end()
def __init__(self, image_filenames, labels, root_directory='', batch_size=128, mix=False, shuffle=True, augment=True): self.image_filenames = image_filenames self.labels = labels self.root_directory = root_directory self.batch_size = batch_size self.is_mix = mix self.is_augment = augment self.shuffle = shuffle if self.shuffle: self.on_epoch_end() if self.is_augment: self.generator = Compose([Blur(), Flip(), Transpose(), ShiftScaleRotate(), RandomBrightnessContrast(), HueSaturationValue(), CLAHE(), GridDistortion(), ElasticTransform(), CoarseDropout(), ToFloat(max_value=255.0, p=1.0)], p=1.0) else: self.generator = Compose([ToFloat(max_value=255.0, p=1.0)], p=1.0)
def get_transform(opt, method=cv2.INTER_LINEAR): transform_list = [] if 'preprocess' in opt: if 'resize' in opt['preprocess']: transform_list.append(Resize(opt['input_size'][0], opt['input_size'][1], method)) if 'tofloat' in opt and opt['tofloat'] == True: transform_list.append(ToFloat()) return Compose(transform_list)
def __init__(self, mean: Tuple[float, float, float] = (0.485, 0.456, 0.406), std: Tuple[float, float, float] = (0.229, 0.224, 0.225)): transformations = [ ToFloat(max_value=255.0), Normalize(mean=mean, std=std, max_pixel_value=1.0) ] super().__init__( transformations=transformations, global_application_probab=1.0, )
def get_transforms(): """ Quick utility function to return the augmentations for the training/validation generators """ aug_train = Compose([ HorizontalFlip(p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness(), ], p=0.3), OneOf([ ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(), OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3), ToFloat(max_value=1) ],p=1) aug_val = Compose([ ToFloat(max_value=1) ],p=1) return aug_train, aug_val
def strong_aug(p=1.0): return Compose( [ RandomSizedCrop((100, HEIGHT), HEIGHT, WIDTH, w2h_ratio=1.0, p=1.0), Compose( [ Flip(), RandomRotate90(), Transpose(), OneOf([IAAAdditiveGaussianNoise(), GaussNoise()], p=0.2), OneOf( [MedianBlur(blur_limit=3), Blur(blur_limit=3), MotionBlur()] ), ShiftScaleRotate(args.shift, args.scale, args.rotate), # min_max_height: (height of crop before resizing) # crop_height = randint(min_height, max_height), endpoints included # crop_width = crop_height * w2h_ratio # height, width: height/width after crop and resize, for convenience, just use args for resize OneOf( [ GridDistortion(p=0.5), ElasticTransform(p=0.5), IAAPerspective(), IAAPiecewiseAffine(), ] ), OneOf( [ RGBShift(args.r_shift, args.g_shift, args.b_shift), HueSaturationValue( args.hue_shift, args.sat_shift, args.val_shift ), # ChannelShuffle(), CLAHE(args.clip), RandomBrightnessContrast( args.brightness, args.contrast ), RandomGamma(gamma_limit=(80, 120)), # ToGray(), ImageCompression(quality_lower=75, quality_upper=100), ] ), ], p=p, ), ToFloat(max_value=255), ] )
def __init__(self, normalization: Optional[str] = None, whitespace_normalization: bool = True, reorder: bool = True, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose([]), preload: bool = True, augmentation: bool = False) -> None: self._images = [] # type: Union[List[Image], List[torch.Tensor]] self._gt = [] # type: List[str] self.alphabet = Counter() # type: Counter self.text_transforms = [] # type: List[Callable[[str], str]] # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.transforms = im_transforms self.preload = preload self.aug = None self.seg_type = 'baselines' # built text transformations if normalization: self.text_transforms.append(partial(F_t.text_normalize, normalization=normalization)) if whitespace_normalization: self.text_transforms.append(F_t.text_whitespace_normalize) if reorder: self.text_transforms.append(F_t.text_reorder) if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, ) self.aug = Compose([ ToFloat(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=3, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), ], p=0.5) self.im_mode = '1'
def train_transform( from_dicom: bool, longest_max_size: int, additional_transforms: List[BasicTransform] = None ) -> Compose: additional_transforms = additional_transforms if additional_transforms is not None else [] initial_transforms = [ LongestMaxSize(longest_max_size), ] if from_dicom: initial_transforms.insert(0, Lambda(image=stack_channels_for_rgb)) final_transforms = [ ToFloat(), ToTensorV2(), ] transforms = initial_transforms + additional_transforms + final_transforms return Compose(transforms, bbox_params=BboxParams(format='pascal_voc', label_fields=['labels']))
def get_augmentations_train(): AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness(), ], p=0.3), OneOf([ ElasticTransform( alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(), OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3), RandomSizedCrop(min_max_height=(176, 256), height=h, width=w, p=0.25), ToFloat(max_value=1) ], p=1) return AUGMENTATIONS_TRAIN
def strong_tiff_aug(p=.5): return Compose( [ # albumentations supports uint8 and float32 inputs. For the latter, all # values must lie in the range [0.0, 1.0]. To apply augmentations, we # first use a `ToFloat()` transformation, which will inspect the data # type of the input image and convert the image to a float32 ndarray where # all values lie in the required range [0.0, 1.0]. ToFloat(), # Alternatively, you can specify the maximum possible value for your input # and all values will be divided by it instead of using a predefined value # for a specific data type. # ToFloat(max_value=65535.0), # Then we will apply augmentations RandomRotate90(), Flip(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate( shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), GridDistortion(p=0.1), ], p=0.2), # You can convert the augmented image back to its original # data type by using `FromFloat`. # FromFloat(dtype='uint16'), # As in `ToFloat` you can specify a `max_value` argument and all input values # will be multiplied by it. FromFloat(dtype='uint16', max_value=65535.0), ], p=p)
def load_timeseries_frame_data(self, data_paths, dims, n_channels, timesteps, augmentations=Compose([ ToFloat(max_value=255, p=0.0), ])): # randomly choose time steps to include in timeseries sample random_selection = self.__random_frame_sample(data_paths, timesteps) # if the timeseries has fewer timesteps than what we want to train the model with, we pad it # with existing randomly selected timesteps if len(random_selection) < timesteps: random_selection = self.__extend_sample_timesteps( random_selection, timesteps) # train model on chronologically ordered data random_selection = self.__sort_data(random_selection) # load the actual data from our randomly chosen files timeseries_sample = self.__create_timeseries_sample( random_selection, dims, n_channels, timesteps) # keep track of file paths for model test file metadata self.file_paths = random_selection mask_array = self.__get_mask(self.sub_dataset, self.frame_index_key) # convert mask to one_hot encoding for categorical data (multi-class classification, non-binary) # mask_array = self.__to_one_hot(mask_array, n_classes, dims) x_stack, mask_array = self.__augment_data(timeseries_sample, mask_array, augmentations) return x_stack, mask_array.reshape((*dims, 1))
AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness(), ], p=0.3), OneOf([ ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(), OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3), RandomSizedCrop(min_max_height=(128, 256), height=h, width=w, p=0.5), ToFloat(max_value=1) ], p=1) AUGMENTATIONS_TEST = Compose([ToFloat(max_value=1)], p=1) a = DataGenerator(batch_size=64, shuffle=False) images, masks = a.__getitem__(0) max_images = 64 grid_width = 16 grid_height = int(max_images / grid_width) fig, axs = plt.subplots(grid_height, grid_width, figsize=(grid_width, grid_height)) for i, (im, mask) in enumerate(zip(images, masks)): ax = axs[int(i / grid_width), i % grid_width]
AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness(), ], p=0.3), OneOf([ ElasticTransform(alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(), OpticalDistortion(distort_limit=2, shift_limit=0.5), ], p=0.3), RandomSizedCrop(min_max_height=(156, 256), height=h, width=w, p=0.25), ToFloat(max_value=1) ], p=1) AUGMENTATIONS_TEST = Compose([ToFloat(max_value=1)], p=1) # # Train Set Images with Masks # In[9]: a = DataGenerator(batch_size=64, shuffle=False) images, masks = a.__getitem__(0) max_images = 64 grid_width = 16 grid_height = int(max_images / grid_width) fig, axs = plt.subplots(grid_height,
def apply_clahe(): return Compose([CLAHE(p=1.0, clip_limit=2.0), ToFloat(max_value=255, p=1)], p=1)
'Train mean/std {:.3f}/{:.3f}\n'.format(train_mean, train_std) + \ 'Test mean/std {:.3f}/{:.3f}\n'.format(test_mean, test_std) +\ 'Train num/sample {:d}'.format(len(TRAIN_FILES)) + ' '.join(TRAIN_FILES[:2]) + \ '\nValid num/sample {:d}'.format(len(VALID_FILES)) + ' '.join(VALID_FILES[:2])+'\n' print2file(sout, LOG_FILE) ######################################################################## # Augmentations augment_train = Compose( [ Flip(p=0.5), # Flip vertically or horizontally or both ShiftScaleRotate(rotate_limit=10, p=0.3), RandomBrightnessContrast(p=0.3), Normalize(mean=(train_mean, train_mean, train_mean), std=(train_std, train_std, train_std)), ToFloat(max_value=1.) ], p=1) # validation augment_valid = Compose([ Normalize(mean=(train_mean, train_mean, train_mean), std=(train_std, train_std, train_std)), ToFloat(max_value=1.) ], p=1) # normal prediction augment_test = Compose([ Normalize(mean=(test_mean, test_mean, test_mean), std=(test_std, test_std, test_std)),
def make_augmentation(data_shape, resize=None, hflip=0, vflip=0, scale=None, rotate=None, color=None, deform=None, rand_crop=None, windows=('soft_tissue', ), windows_force_rgb=True, max_value=1.0): transforms = [] if resize == 'auto': resize = data_shape if resize: transforms.append(Resize(*resize)) if hflip: transforms.append(HorizontalFlip(p=hflip)) if vflip: transforms.append(VerticalFlip(p=vflip)) if scale: if not isinstance(scale, dict): scale = {'scale_limit': scale} transforms.append(RandomScale(**scale)) if rotate: if not isinstance(rotate, dict): rotate = {'limit': rotate} transforms.append(Rotate(**rotate)) if deform: oneof = [] deform_p = deform.get('p', .3) elastic = deform.get('elastic', None) grid = deform.get('grid', None) optical = deform.get('optical', None) if elastic: oneof.append(ElasticTransform(**elastic)) if grid: oneof.append(GridDistortion(**grid)) if optical: oneof.append(OpticalDistortion(**optical)) transforms.append(OneOf(oneof, p=deform_p)) transforms.append( PadIfNeeded(min_height=data_shape[1], min_width=data_shape[0])) if rand_crop: if not isinstance(rand_crop, dict): rand_crop = {'p': rand_crop} rand_crop.setdefault('p', 1.0) r_crop = RandomCrop(height=data_shape[1], width=data_shape[0], **rand_crop) transforms.append(r_crop) # rand_crop.setdefault('scale', (0, 0)) # rand_crop.setdefault('ratio', (1.0, 1.0)) # r_crop = RandomResizedCrop(height=data_shape[1], width=data_shape[0], **rand_crop) # transforms.append(r_crop) c_crop = CenterCrop(height=data_shape[1], width=data_shape[0]) transforms.append( PadIfNeeded(min_height=data_shape[1], min_width=data_shape[0])) transforms.append(c_crop) if color: oneof = [] color_p = color.get('p', .3) contrast = color.get('contrast', None) gamma = color.get('gamma', None) brightness = color.get('brightness', None) if contrast: oneof.append(RandomContrast(**contrast)) if gamma: oneof.append(RandomGamma(**gamma)) if brightness: oneof.append(RandomBrightness(**brightness)) transforms.append(OneOf(oneof, p=color_p)) transforms.append( ChannelWindowing( windows=windows, force_rgb=windows_force_rgb, )) transforms.append(ToFloat(max_value=max_value)) return Compose(transforms)
#limiting brightness to 255 new_image = np.vectorize(lambda x: x if x < 255 else 255)(new_image) return new_image augmentations = Compose([ ShiftScaleRotate(shift_limit=0.07, scale_limit=0.07, rotate_limit=50, border_mode=cv2.BORDER_CONSTANT, value=0, p=0.95), Lambda(image=randomBlotch, p=0.7), ToFloat(max_value=255) # normalizes the data to [0,1] in float ]) class AugmentedDataSequence(Sequence): def __init__(self, x_set, y_set, batch_size=32): self.x = x_set self.y = y_set self.batch_size = batch_size def __len__(self): return int(np.ceil((len(self.x) + 0.0) / self.batch_size)) def __getitem__(self, index): x_batch = self.x[index * self.batch_size:(index + 1) * self.batch_size] y_batch = self.y[index * self.batch_size:(index + 1) * self.batch_size]
Compose, HorizontalFlip, CLAHE, HueSaturationValue, RandomBrightness, RandomContrast, RandomGamma,OneOf, ToFloat, ShiftScaleRotate,GridDistortion, ElasticTransform, JpegCompression, HueSaturationValue, RGBShift, RandomBrightness, RandomContrast, Blur, MotionBlur, MedianBlur, GaussNoise,CenterCrop, IAAAdditiveGaussianNoise,GaussNoise,OpticalDistortion,RandomSizedCrop ) size = 512 AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=0.5), OneOf([ RandomContrast(), RandomGamma(), RandomBrightness() ], p=0.3), OneOf([ ElasticTransform(alpha = 120, sigma=120*0.05,alpha_affine = 12*0.03), GridDistortion(), OpticalDistortion(distort_limit = 2, shift_limit = 0.5) ], p=0.3), RandomSizedCrop(min_max_height=(512,1024),height = size, width =size,p=1), ToFloat(max_value=1) ], p =1) AUGMENTATIONS_TEST = Compose([ RandomSizedCrop(min_max_height=(512,1024),height = size, width =size,p=1), ToFloat(max_value=1) ],p=1) AUGMENTATIONS_TEST2 = Compose([ ToFloat(max_value=1) ],p=1)
random_state=seed, stratify=train.iloc[:, 0]) train_df = train_df.reset_index(drop=True) val_df = val_df.reset_index(drop=True) train_augmentations = Compose([ albumentations.OneOf([ GridMask(num_grid=3, mode=0, rotate=15), GridMask(num_grid=3, mode=2, rotate=15), ], p=0.7), RandomAugMix(severity=4, width=3, alpha=1.0, p=0.7), Resize(*img_size), ShiftScaleRotate(shift_limit=0, scale_limit=0, rotate_limit=10, p=0.5), ToFloat(max_value=255), ToTensor() ], p=1) val_augmentations = Compose( [Resize(*img_size), ToFloat(max_value=255), ToTensor()], p=1) train_dataset = digitdataset(data=train_df, transform=train_augmentations) val_dataset = digitdataset(data=val_df, transform=val_augmentations) # ============================================================================= # image , label = train_dataset.__getitem__(15) # # plt.imshow(image["image"].permute(1 , 2 , 0).numpy(), cmap = 'gray') # plt.title(str(label))
def __init__(self, split: Callable[[str], str] = lambda x: path.splitext(x)[0], suffix: str = '.gt.txt', normalization: Optional[str] = None, whitespace_normalization: bool = True, reorder: bool = True, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), preload: bool = True, augmentation: bool = False) -> None: """ Reads a list of image-text pairs and creates a ground truth set. Args: split (func): Function for generating the base name without extensions from paths suffix (str): Suffix to attach to image base name for text retrieval mode (str): Image color space. Either RGB (color) or L (grayscale/bw). Only L is compatible with vertical scaling/dewarping. scale (int, tuple): Target height or (width, height) of dewarped line images. Vertical-only scaling is through CenterLineNormalizer, resizing with Lanczos interpolation. Set to 0 to disable. normalization (str): Unicode normalization for gt whitespace_normalization (str): Normalizes unicode whitespace and strips whitespace. reorder (bool): Whether to rearrange code points in "display"/LTR order im_transforms (func): Function taking an PIL.Image and returning a tensor suitable for forward passes. preload (bool): Enables preloading and preprocessing of image files. """ self.suffix = suffix self.split = lambda x: split(x) + self.suffix self._images = [] # type: Union[List[Image], List[torch.Tensor]] self._gt = [] # type: List[str] self.alphabet = Counter() # type: Counter self.text_transforms = [] # type: List[Callable[[str], str]] # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.aug = None self.preload = preload self.seg_type = 'bbox' # built text transformations if normalization: self.text_transforms.append( lambda x: unicodedata.normalize(cast(str, normalization), x)) if whitespace_normalization: self.text_transforms.append( lambda x: regex.sub('\s', ' ', x).strip()) if reorder: self.text_transforms.append(bd.get_display) if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, ) self.aug = Compose([ ToFloat(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), ], p=0.5) self.im_mode = '1'
def __init__(self, imgs: Sequence[str] = None, suffix: str = '.path', line_width: int = 4, im_transforms: Callable[[Any], torch.Tensor] = transforms.Compose( []), mode: str = 'path', augmentation: bool = False, valid_baselines: Sequence[str] = None, merge_baselines: Dict[str, Sequence[str]] = None, valid_regions: Sequence[str] = None, merge_regions: Dict[str, Sequence[str]] = None): """ Reads a list of image-json pairs and creates a data set. Args: imgs (list): suffix (int): Suffix to attach to image base name to load JSON files from. line_width (int): Height of the baseline in the scaled input. target_size (tuple): Target size of the image as a (height, width) tuple. mode (str): Either path, alto, page, xml, or None. In alto, page, and xml mode the baseline paths and image data is retrieved from an ALTO/PageXML file. In `None` mode data is iteratively added through the `add` method. augmentation (bool): Enable/disable augmentation. valid_baselines (list): Sequence of valid baseline identifiers. If `None` all are valid. merge_baselines (dict): Sequence of baseline identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. valid_regions (list): Sequence of valid region identifiers. If `None` all are valid. merge_regions (dict): Sequence of region identifiers to merge. Note that merging occurs after entities not in valid_* have been discarded. """ super().__init__() self.mode = mode self.im_mode = '1' self.aug = None self.targets = [] # n-th entry contains semantic of n-th class self.class_mapping = { 'aux': { '_start_separator': 0, '_end_separator': 1 }, 'baselines': {}, 'regions': {} } self.class_stats = { 'baselines': defaultdict(int), 'regions': defaultdict(int) } self.num_classes = 2 self.mbl_dict = merge_baselines if merge_baselines is not None else {} self.mreg_dict = merge_regions if merge_regions is not None else {} self.valid_baselines = valid_baselines self.valid_regions = valid_regions if mode in ['alto', 'page', 'xml']: if mode == 'alto': fn = parse_alto elif mode == 'page': fn = parse_page elif mode == 'xml': fn = parse_xml im_paths = [] self.targets = [] for img in imgs: try: data = fn(img) im_paths.append(data['image']) lines = defaultdict(list) for line in data['lines']: if valid_baselines is None or line[ 'script'] in valid_baselines: lines[self.mbl_dict.get(line['script'], line['script'])].append( line['baseline']) self.class_stats['baselines'][self.mbl_dict.get( line['script'], line['script'])] += 1 regions = defaultdict(list) for k, v in data['regions'].items(): if valid_regions is None or k in valid_regions: regions[self.mreg_dict.get(k, k)].extend(v) self.class_stats['regions'][self.mreg_dict.get( k, k)] += len(v) data['regions'] = regions self.targets.append({ 'baselines': lines, 'regions': data['regions'] }) except KrakenInputException as e: logger.warning(e) continue # get line types imgs = im_paths # calculate class mapping line_types = set() region_types = set() for page in self.targets: for line_type in page['baselines'].keys(): line_types.add(line_type) for reg_type in page['regions'].keys(): region_types.add(reg_type) idx = -1 for idx, line_type in enumerate(line_types): self.class_mapping['baselines'][ line_type] = idx + self.num_classes self.num_classes += idx + 1 idx = -1 for idx, reg_type in enumerate(region_types): self.class_mapping['regions'][ reg_type] = idx + self.num_classes self.num_classes += idx + 1 elif mode == 'path': pass elif mode is None: imgs = [] else: raise Exception('invalid dataset mode') if augmentation: from albumentations import ( Compose, ToFloat, FromFloat, RandomRotate90, Flip, OneOf, MotionBlur, MedianBlur, Blur, ShiftScaleRotate, OpticalDistortion, ElasticTransform, RandomBrightnessContrast, HueSaturationValue, ) self.aug = Compose([ ToFloat(), RandomRotate90(), Flip(), OneOf([ MotionBlur(p=0.2), MedianBlur(blur_limit=3, p=0.1), Blur(blur_limit=3, p=0.1), ], p=0.2), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.2), OneOf([ OpticalDistortion(p=0.3), ElasticTransform(p=0.1), ], p=0.2), HueSaturationValue(hue_shift_limit=20, sat_shift_limit=0.1, val_shift_limit=0.1, p=0.3), ], p=0.5) self.imgs = imgs self.line_width = line_width # split image transforms into two. one part giving the final PIL image # before conversion to a tensor and the actual tensor conversion part. self.head_transforms = transforms.Compose(im_transforms.transforms[:2]) self.tail_transforms = transforms.Compose(im_transforms.transforms[2:]) self.seg_type = None
AUGMENTATIONS_TRAIN = Compose([ HorizontalFlip(p=1), RandomContrast(limit=0.2, p=0.5), RandomGamma(gamma_limit=(80, 120), p=0.5), RandomBrightness(limit=0.2, p=0.5), HueSaturationValue(hue_shift_limit=5, sat_shift_limit=20, val_shift_limit=10, p=.9), # CLAHE(p=1.0, clip_limit=2.0), ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, border_mode=cv2.BORDER_REFLECT_101, p=0.8), ToFloat(max_value=255) ]) aug = HorizontalFlip(p=1) from matplotlib import pyplot as plt image_path = 'D:\Data\iris_pattern\Multi_output2_test40_train160/train/defect_lacuna_normal_normal/defect_lacuna_normal_normal6.png' image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # image = aug(image=image)['image'] plt.figure(figsize=(10, 10)) plt.imshow(image) plt.show()