def setup(self, stage=None): if stage == 'fit' or stage is None: self.train_set = Sku( csv_file=self.data_dir + '/annotations/annotations_train.csv', root_dir=self.data_dir + '/images', transform=A.Compose([ A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(p=0.5), A.RandomBrightnessContrast(p=0.2), A.RGBShift(p=0.2), A.RandomSizedBBoxSafeCrop(width=1333, height=800), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['class_labels']))) self.train_set, test_set = torch.utils.data.random_split( self.train_set, [5000, len(self.train_set) - 5000], generator=torch.Generator().manual_seed(42)) self.val_set = Sku( csv_file=self.data_dir + '/annotations/annotations_val.csv', root_dir=self.data_dir + '/images', transform=A.Compose([ A.RandomSizedBBoxSafeCrop(width=1333, height=800), ], bbox_params=A.BboxParams( format='pascal_voc', label_fields=['class_labels']))) self.val_set, test_set = torch.utils.data.random_split( self.val_set, [500, len(self.val_set) - 500], generator=torch.Generator().manual_seed(42))
def train_albumentations_tfms_pets(): """ Composes a Pipeline of Albumentations Transforms for PETS dataset at the train stage Returns: AlbumentationsTransform: Pipeline of Albumentations Transforms Examples:: >>> train_tfms = train_albumentations_tfms_pets(train=True) >>> train_ds = Dataset(train_records, train_tfms) [[https://albumentations.readthedocs.io/en/latest/_modules/albumentations/augmentations/transforms.html/|Albumentations Transforms ]] """ import albumentations as A # ImageNet stats imagenet_mean, imagenet_std = IMAGENET_STATS return AlbuTransform([ A.LongestMaxSize(384), A.RandomSizedBBoxSafeCrop(320, 320, p=0.3), A.HorizontalFlip(), A.ShiftScaleRotate(rotate_limit=20), A.RGBShift(always_apply=True), A.RandomBrightnessContrast(), A.Blur(blur_limit=(1, 3)), A.Normalize(mean=imagenet_mean, std=imagenet_std), ])
def get_train_transform(): return A.Compose([ A.Flip(p=0.5), A.Rotate(20, p=0.9), A.RandomSizedBBoxSafeCrop(1024, 1024, 0.2), ToTensorV2(p=1.0) ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
def get_train_transforms(): return A.Compose( [ # A.RandomSizedCrop(min_max_height=(380, 676), height=1024, width=1024, p=0.5), A.RandomSizedBBoxSafeCrop(512, 512, erosion_rate=0.0, interpolation=1, p=0.5), # A.OneOf([ # A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, # val_shift_limit=0.2, p=0.9), # A.RandomBrightnessContrast(brightness_limit=0.2, # contrast_limit=0.2, p=0.9), # ], p=0.9), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), A.Resize(height=512, width=512, p=1), A.Cutout(num_holes=4, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), A.Normalize(p=1), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels'] ) )
def get_strong_train_transform(): return A.Compose( [ #A.Resize(height=IMG_SIZE, width=IMG_SIZE, p=1), A.RandomSizedBBoxSafeCrop( IMG_SIZE, IMG_SIZE, interpolation=1, p=0.33), A.HorizontalFlip(), A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=10, interpolation=1, p=0.5), A.OneOf([ A.Blur(blur_limit=(1, 3), p=0.33), A.MedianBlur(blur_limit=3, p=0.33), A.ImageCompression(quality_lower=50, p=0.33), ], p=0.5), A.OneOf([ A.RandomGamma(gamma_limit=(85, 115), p=0.33), A.RandomBrightnessContrast(brightness_limit=0.2, p=0.33), A.HueSaturationValue(hue_shift_limit=25, sat_shift_limit=25, val_shift_limit=30, p=0.5) ], p=0.34), A.CLAHE(clip_limit=2.5, p=0.5), A.Normalize(always_apply=True, p=1.0), ToTensorV2(p=1.0) ], bbox_params=A.BboxParams(format='pascal_voc', min_area=5, min_visibility=0.1, label_fields=['labels']))
def get_torch_dataset(self): cfg = self.cfg augs = [] if cfg.aug_flip: augs.append(albu.HorizontalFlip(p=1.0)) if cfg.aug_brightness: augs.append(albu.RandomBrightness(limit=0.2, p=1.0)) if cfg.aug_randomcrop: aug = albu.RandomSizedBBoxSafeCrop(cfg.img_height, cfg.img_width) augs.append(aug) common_transforms = self.get_common_transforms() train_dataset = TransformDataset( AIEdgeDataset(cfg.data_dir, "train", cfg.seed), albu.Compose( transforms=[*augs, *common_transforms], bbox_params=AIEdgeDataset.bbox_params, ), ) valid_dataset = TransformDataset( AIEdgeDataset(cfg.data_dir, "valid", cfg.seed), albu.Compose( transforms=common_transforms, bbox_params=AIEdgeDataset.bbox_params, ), ) return train_dataset, valid_dataset
def albu_train(): return A.Compose( [ A.RandomSizedBBoxSafeCrop(512, 512, p=1.0), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.OneOf([ A.RandomGamma(), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2), A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2) ], p=0.9), #A.CLAHE(p=1.0), # CLAHE only supports uint8 A.MedianBlur(blur_limit=7, p=0.5), A.CoarseDropout(max_height=64, max_width=64, fill_value=0, min_holes=2, min_height=8, min_width=8, p=0.5), A.InvertImg(p=0.5), ToTensor() ], p=1.0, bbox_params={ 'format': 'pascal_voc', 'min_area': 0, 'min_visibility': 0, 'label_fields': ['labels'] })
def __init__(self, root, year, input_size, pooler_size): super(TrainDataset, self).__init__(root=os.path.join(root, f'train{year}'), annFile=os.path.join( root, 'annotations', f'instances_train{year}.json')) self.h, self.w = input_size self.pooler_size = pooler_size self.cat_idx_list, self.cat_to_label_map, _ = tools.get_cat_label_map( self.coco, tools.COCO_CLASSES) self.img_transform = alb.Compose([ alb.RandomSizedBBoxSafeCrop(width=self.w, height=self.h), alb.HorizontalFlip(p=0.5), alb.ColorJitter( brightness=0.4, contrast=0.4, saturation=0.4, hue=0., p=0.8), ], bbox_params=alb.BboxParams( format='coco', label_fields=['class_labels'])) self.points, self.regress_ranges = tools.encode_points_and_regress_ranges( self.h, self.w)
def __init__(self, csv_path, data_path, train=True): if train: self.data = pd.read_csv(csv_path) else: self.data = pd.read_csv(csv_path) self.transforms = transforms.ToTensor() self.data_path = data_path self.train = train rotate_crop = albu.Compose([ albu.Rotate(limit=10, p=1.), albu.RandomSizedCrop((185, 202), height=224, width=224, p=1.) ], p=0.5) color = albu.OneOf([ albu.RandomBrightnessContrast(), albu.Blur(blur_limit=3), albu.GaussNoise() ], p=0.3) self.aug = albu.Compose([ albu.HorizontalFlip(), albu.RandomSizedBBoxSafeCrop( height=224, width=224, erosion_rate=0.6, p=0.45), color ], bbox_params={ 'format': 'pascal_voc', 'label_fields': ['category_id'] }, p=1.)
def main(): aug = get_aug([ A.HorizontalFlip(p=.5), A.RandomSizedBBoxSafeCrop(width=448, height=448, erosion_rate=0, interpolation=cv2.INTER_CUBIC), A.RGBShift(p=.5), A.Blur(blur_limit=5, p=0.5), A.RandomBrightnessContrast(p=0.5), A.CLAHE(p=0.5), ]) voc = VOCDataset(cfg.DATASET_PATH, classes_list=cfg.CLASSES, image_set='train', transforms=aug) for i in range(1000): image, out = voc[i] boxes = post_processing(out) im_size = image.shape[0] for det in boxes: pt1 = (det[0] - det[2] / 2, det[1] - det[3] / 2) pt2 = (det[0] + det[2] / 2, det[1] + det[3] / 2) pt1 = (int(pt1[0] * im_size), int(pt1[1] * im_size)) pt2 = (int(pt2[0] * im_size), int(pt2[1] * im_size)) cv2.rectangle(image, pt1, pt2, (255, 33, 44)) image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.imshow("test", image) cv2.waitKeyEx(-1)
def get_train_data_loader(path, classes_list, batch_size, shuffle=True, num_workers=4): aug = get_aug([ A.HorizontalFlip(p=.5), A.RandomSizedBBoxSafeCrop(width=448, height=448, erosion_rate=0, interpolation=cv2.INTER_CUBIC), A.RGBShift(p=.5), A.Blur(blur_limit=5, p=0.5), A.RandomBrightnessContrast(p=0.5), A.CLAHE(p=0.5), A.Normalize(), ToTensorV2() ]) voc = VOCDataset(path, classes_list=classes_list, image_set='train', transforms=aug) dataloader = torch.utils.data.DataLoader(voc, batch_size, shuffle=shuffle, num_workers=num_workers) return dataloader
def __init__(self, size, mean, std, bbox_format='albumentations'): super(Pipeline, self).__init__([ A.RandomBrightnessContrast(), A.RandomGamma(), A.HueSaturationValue(sat_shift_limit=50, p=0.6), A.CLAHE(), A.HorizontalFlip(), A.Cutout(p=0.5), A.RandomSizedBBoxSafeCrop(size[1], size[0], p=0.8), A.Resize(size[1], size[0]), A.Normalize(mean=mean, std=std), BboxFormatConvert(bbox_format, 'albumentations'), ToTensor() ], bbox_format)
def get_train_transform(img_size): return A.Compose( [ A.Resize(height=img_size, width=img_size, p=1), A.RandomSizedBBoxSafeCrop( img_size, img_size, interpolation=1, p=0.33 ), A.Flip(), A.ShiftScaleRotate( shift_limit=0.05, scale_limit=0.1, rotate_limit=10, interpolation=1, border_mode=0, value=0, p=0.5, ), A.OneOf( [ A.Blur(blur_limit=(1, 3), p=0.33), A.MedianBlur(blur_limit=3, p=0.33), A.ImageCompression(quality_lower=50, p=0.33), ], p=0.33, ), A.OneOf( [ A.RandomGamma(gamma_limit=(90, 110), p=0.2), A.RandomBrightnessContrast(brightness_limit=0.2, p=0.4), A.HueSaturationValue( hue_shift_limit=25, sat_shift_limit=25, val_shift_limit=30, p=0.5, ), ], p=0.4, ), A.CLAHE(clip_limit=2, p=0.2), A.Normalize(always_apply=True, p=1.0), ToTensorV2(p=1.0), ], bbox_params=A.BboxParams( format="pascal_voc", min_area=64, min_visibility=0.1, label_fields=["labels"], ), )
def bboxes_augmentation(cfg): ''' ''' transforms = [] if cfg['smallest'] > 0: transforms += [ albumentations.SmallestMaxSize(max_size=cfg['smallest'], p=1.0) ] if cfg.get('random_crop', 0): # transforms += [OneOf([albumentations.RandomCrop(height=1024, width=1024, p=0.8), # albumentations.RandomCrop(height=720, width=720, p=0.8),], p=1.),] if cfg.get('safe_crop', 0): transforms += [ albumentations.RandomSizedBBoxSafeCrop(height=cfg['height'], width=cfg['width'], p=1.) ] else: transforms += [ albumentations.RandomSizedCrop(cfg['min_max_height'], height=cfg['height'], width=cfg['width'], p=1.0) ] if cfg.get('flip', 0): transforms += [albumentations.HorizontalFlip(p=0.5)] transforms += [ albumentations.RandomBrightness(limit=0.2, p=0.3), albumentations.RandomContrast(limit=0.2, p=0.3), albumentations.Blur(blur_limit=5, p=0.2), albumentations.GaussNoise(var_limit=(5, 20), p=0.2), albumentations.ChannelShuffle(p=0.2), ] bbox_params = { 'format': 'pascal_voc', 'min_visibility': cfg['min_visibility'], 'label_fields': ['labels'], 'min_area': cfg['min_area'] } if cfg['bbox'] else {} return Compose(transforms, bbox_params=bbox_params, p=1.)
def get_dataset(dataset, args): width = height = args.data_shape train_aug = [ A.HorizontalFlip(), A.RandomSizedBBoxSafeCrop(width, height), A.RGBShift(), A.Blur(blur_limit=11), A.RandomBrightness(), A.CLAHE(), A.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ToTensor(), ] val_aug = [ A.Resize(width, height), A.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]), ToTensor(), ] if dataset.lower() == 'voc': train_dataset = dlcv.data.VOCDetection( root='/home/vismarty/.datasets/VOCdevkit', splits=[(2007, 'trainval'), (2012, 'trainval')], transform=get_transform(train_aug)) val_dataset = dlcv.data.VOCDetection( root='/home/vismarty/.datasets/VOCdevkit', splits=[(2007, 'test')], transform=get_transform(val_aug)) # val_metric = dlcv.utils.metrics.voc_detection.VOC07MApMetric(iou_thresh=0.5, class_names=val_dataset.classes) val_metric = None # elif dataset.lower() == 'coco': # train_dataset = gdata.COCODetection(root=args.dataset_root + "/coco", splits='instances_train2017') # val_dataset = gdata.COCODetection(root=args.dataset_root + "/coco", splits='instances_val2017', skip_empty=False) # val_metric = COCODetectionMetric( # val_dataset, args.save_prefix + '_eval', cleanup=True, # data_shape=(args.data_shape, args.data_shape), post_affine=get_post_transform) # # coco validation is slow, consider increase the validation interval # if args.val_interval == 1: # args.val_interval = 10 else: raise NotImplementedError( 'Dataset: {} not implemented.'.format(dataset)) if args.num_samples < 0: args.num_samples = len(train_dataset) return train_dataset, val_dataset, val_metric
def __init__(self, root, input_size): super(TrainDataset, self).__init__(root=os.path.join(root, 'train'), annFile=os.path.join(root, 'train.json')) self.h, self.w = input_size self.transform = alb.Compose([ alb.RandomSizedBBoxSafeCrop(width=self.w, height=self.h), alb.HorizontalFlip(p=0.5), alb.ColorJitter( brightness=0.3, contrast=0.3, saturation=0.3, hue=0., p=0.8), alb.Normalize(), ToTensorV2() ], bbox_params=alb.BboxParams( format='coco', label_fields=['class_labels']))
def train_dataloader(self): hparams = self.hparams augs = [] if hparams.aug_flip: augs.append(albu.HorizontalFlip(p=1.0)) if hparams.aug_brightness: augs.append(albu.RandomBrightness(limit=0.2, p=1.0)) if hparams.aug_randomcrop: aug = albu.RandomSizedBBoxSafeCrop(hparams.img_height, hparams.img_width) augs.append(aug) dataset = TransformDataset( self.train_dataset, albu.Compose( [ *augs, albu.Resize(hparams.img_height // 4, hparams.img_width // 4), albu.ToFloat(255), ToTensorV2(), ], bbox_params=AIEdgeDataset.bbox_params, ), ) if self.hparams.gpus > 1: sampler = data.distributed.DistributedSampler(dataset, shuffle=True) else: sampler = None return data.DataLoader( dataset, batch_size=hparams.batch_size, collate_fn=CollateFn(self.net.stride, hparams, "train"), num_workers=hparams.num_workers, shuffle=(sampler is None), sampler=sampler, pin_memory=(hparams.gpus > 0), drop_last=True, )
def get_transforms(model='ssd'): assert model in ['ssd', 'yolo'] if model == 'ssd': height, width = 300, 300 else: height, width = 320, 320 return aug.Compose( [ aug.RandomSizedBBoxSafeCrop(height=height, width=width, p=0.5), aug.HueSaturationValue(), aug.GaussianBlur(blur_limit=(1, 3)), # aug.MotionBlur(blur_limit=5), aug.RandomBrightnessContrast(), # aug.RandomFog(), # aug.Rotate(limit=5) # apply rotation with a small angle ], bbox_params=aug.BboxParams(format='pascal_voc', min_visibility=0.0, label_fields=['class_labels']))
def train_transform(width=512, height=512, min_area=0.0, min_visibility=0.0, lamda_norm=False): list_transforms = [] augment = albu.Compose([ albu.OneOf( [ albu.RandomSizedBBoxSafeCrop(p=1.0, height=height, width=width), albu.HorizontalFlip(p=1.0), albu.VerticalFlip(p=1.0), albu.RandomRotate90(p=1.0), albu.NoOp(p=1.0) ] ), albu.OneOf( [ albu.RandomBrightnessContrast(p=1.0), albu.RandomGamma(p=1.0), albu.NoOp(p=1.0) ] ), albu.OneOf( [ albu.MotionBlur(p=1.0), albu.RandomFog(p=1.0), albu.RandomRain(p=1.0), albu.CLAHE(p=1.0), albu.ToGray(p=1.0), albu.NoOp(p=1.0) ] ) ]) list_transforms.extend([augment]) if lamda_norm: list_transforms.extend([albu.Lambda(image=lamda_norm_tran)]) else: list_transforms.extend([albu.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255., p=1.0)]) list_transforms.extend([albu.Resize(height=height, width=width, p=1.0)]) return albu.Compose(list_transforms, bbox_params=albu.BboxParams(format='pascal_voc', min_area=min_area, min_visibility=min_visibility, label_fields=['label']))
def _aug(config): global _augmentor if _augmentor is None: _augmentor = A.Compose( [ A.VerticalFlip(p=0.4), A.RandomSizedBBoxSafeCrop(width=config.IMAGE_WIDTH, height=config.IMAGE_HEIGHT, erosion_rate=0.0, p=0.4), A.RGBShift(p=0.4), A.Blur(blur_limit=7, p=0.4), A.RandomBrightness(p=0.4), ], bbox_params={ 'format': 'coco', 'min_area': 0, 'min_visibility': 0, 'label_fields': ['labels'] }, p=0.4) return _augmentor
def hard_transforms(): return albu.Compose([ albu.Rotate(limit=30, interpolation=cv2.INTER_LINEAR, border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0)), albu.RandomSizedBBoxSafeCrop(width=64, height=64, erosion_rate=0.2), albu.InvertImg(p=0.3), albu.HueSaturationValue(p=0.3), albu.OneOf([ albu.IAAAdditiveGaussianNoise(), albu.GaussNoise(), albu.MultiplicativeNoise( multiplier=[0.5, 1.5], per_channel=True, p=1) ], p=0.3), albu.OneOf([ albu.MotionBlur(p=0.2), albu.MedianBlur(blur_limit=3, p=0.1), albu.Blur(blur_limit=3, p=0.1), ], p=0.2), albu.OneOf([ albu.CLAHE(clip_limit=2), albu.IAASharpen(), albu.IAAEmboss(), albu.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=0.3), albu.RandomGamma(gamma_limit=(85, 115), p=0.3), ], p=0.3), albu.JpegCompression(quality_lower=30, quality_upper=100, p=0.5), albu.Cutout( num_holes=10, max_h_size=5, max_w_size=5, fill_value=0, p=0.5), ], p=1, bbox_params=albu.BboxParams(format='pascal_voc'))
def get_train_transforms(): return A.Compose( [ # A.RandomSizedCrop(min_max_height=(380, 676), height=1024, width=1024, p=0.5), A.RandomSizedBBoxSafeCrop( 608, 608, erosion_rate=0.0, interpolation=1, p=1), A.ToGray(p=0.01), A.HorizontalFlip(p=0.5), # A.VerticalFlip(p=0.5), # A.Resize(height=608, width=608, p=1), A.Cutout(num_holes=4, max_h_size=64, max_w_size=64, fill_value=0, p=0.5), # A.Normalize(p=1), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, ))
def get_transform(train): if train: transform = A.Compose([ A.RandomSizedBBoxSafeCrop(width=525, height=600), A.HorizontalFlip(p=0.5), A.OneOf([ A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=30, val_shift_limit=20, p=1), A.RandomBrightnessContrast( brightness_limit=0.2, contrast_limit=0.2, p=1), ], p=1), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), A.Resize(height=512, width=512, p=1.0), ToTensorV2() ], bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['class_labels'])) return transform else: transform = A.Compose([ A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), A.Resize(height=512, width=512, p=1.0), ToTensorV2() ], bbox_params=A.BboxParams( format='pascal_voc', min_area=0, min_visibility=0, label_fields=['class_labels'])) return transform
import albumentations as A from albumentations import Compose as AlbCompose augs = [ A.OneOf([ A.RandomSizedBBoxSafeCrop(width=1280, height=960, erosion_rate=0.2, p=1.0), A.Resize(width=1280, height=960, p=1.0) # RandomSizedBBoxSafeCrop(width=1280, height=960, erosion_rate=0.2, p=1.0), # RandomSizedBBoxSafeCrop(width=1280, height=1280, erosion_rate=0.2, p=1.0), # Resize(width=1200, height=800, p=1.0) ], p=1), A.ToGray(p=0.25), A.ChannelShuffle(p=0.25), A.RGBShift(r_shift_limit=50, g_shift_limit=50, b_shift_limit=50, p=0.5), A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, brightness_by_max=True, p=0.5), A.RandomGamma(p=0.5), A.HorizontalFlip(p=0.25), A.VerticalFlip(p=0.25) # RandomRotate90(p=0.25), # Transpose(p=0.25) ] comp_aug = AlbCompose( augs, bbox_params=A.BboxParams( format='pascal_voc', min_visibility=0.2, label_fields=['labels'] ) )
A.SmallestMaxSize(config.presize), A.CenterCrop(config.crop, config.crop), A.Normalize(), albumentations.pytorch.ToTensorV2() ]) tta_transform_cls = A.Compose([ A.SmallestMaxSize(config.presize), A.Normalize(), albumentations.pytorch.ToTensorV2() ]) # TRANSFORMS FOR LOCALIZATION ENGINE train_transform_loc = A.Compose([ A.RandomSizedBBoxSafeCrop(config.crop, config.crop), A.Rotate(limit=20), A.Normalize(), A.HorizontalFlip(), A.Cutout(), albumentations.pytorch.ToTensorV2() ], bbox_params=A.BboxParams( format='albumentations', min_area=256, min_visibility=0.1)) valid_transform_loc = A.Compose([ A.RandomSizedBBoxSafeCrop(config.crop, config.crop), A.Normalize(), albumentations.pytorch.ToTensorV2()
def get_augmentation(version): if version == "v1": # YOLOv2 size size = 448 # ImageNet Normalization normalization = A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1) augmentation = { "train": A.Compose([ A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.25), A.Blur(blur_limit=4, always_apply=False, p=0.25), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.25), A.OneOf([ A.RandomSizedBBoxSafeCrop(size, size, erosion_rate=0.0, interpolation=1, always_apply=False, p=0.5), A.Resize(size, size, interpolation=1, always_apply=False, p=0.5), ], p=1), normalization ]), "valid": A.Compose([ A.Resize(size, size, interpolation=1, always_apply=False, p=1), normalization ]), } elif version == "v2": # YOLOv2 size size = 448 # ImageNet Normalization normalization = A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1) augmentation = { "train": A.Compose([ A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.25), A.Blur(blur_limit=4, always_apply=False, p=0.25), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.25), A.LongestMaxSize(max_size=size, interpolation=1, always_apply=False, p=1), A.PadIfNeeded(min_height=size, min_width=size, pad_height_divisor=None, pad_width_divisor=None, border_mode=1, value=None, mask_value=None, always_apply=False, p=1), normalization ]), "valid": A.Compose([ A.LongestMaxSize(max_size=size, interpolation=1, always_apply=False, p=1), A.PadIfNeeded(min_height=size, min_width=size, pad_height_divisor=None, pad_width_divisor=None, border_mode=1, value=None, mask_value=None, always_apply=False, p=1), normalization ]), } else: raise Exception(f"Augmentation version '{version}' is unknown!") return augmentation
def main(): bbox_params = A.BboxParams(format='pascal_voc', min_area=1, min_visibility=0.5, label_fields=['field_id']) # Test image from folder: OPF_rat folder_path = './images/OPF_rat' image_name = 'image' image_ext = '.jpg' image_width = 1920 image_height = 1080 image_path = folder_path + '/' + image_name + image_ext xml_path = folder_path + '/' + image_name + '.xml' augmented_path = folder_path + '/augmented' os.makedirs(augmented_path, exist_ok=True) image = cv2.imread(image_path) animal_id_bbox = read_bbox_from_xml(xml_path) # show_image(image, bbox=animal_id_bbox) augmentation_list = { # 'Blur': A.Blur(blur_limit=(8, 12), always_apply=True, p=1.0), # 'VerticalFlip': A.VerticalFlip(always_apply=True, p=1.0), # 'HorizontalFlip': A.HorizontalFlip(always_apply=True, p=1.0), # 'Flip': A.Flip(always_apply=True, p=1.0), # если оба, и horizontal, и vertical - то нормально, иначе - повтор # 'Transpose': A.Transpose(always_apply=True, p=1.0), # 'RandomGamma': A.RandomGamma(gamma_limit=(40, 200), eps=None, always_apply=True, p=1.0), # 'RandomRotate90': A.RandomRotate90(always_apply=True, p=1.0), # 'Rotate': A.Rotate(limit=(-360, 360), interpolation=1, border_mode=2, value=None, mask_value=None, # always_apply=True, p=1.0), # 'HueSaturationValue': A.HueSaturationValue(hue_shift_limit=(-15, 15), sat_shift_limit=(-15, 15), # val_shift_limit=(-15, 15), always_apply=True, p=1.0), # 'RGBShift': A.RGBShift(r_shift_limit=5, g_shift_limit=5, b_shift_limit=5, always_apply=True, p=1.0), # 'RandomBrightness': A.RandomBrightness(limit=0.2, always_apply=True, p=1.0), # 'RandomContrast': A.RandomContrast(limit=0.2, always_apply=True, p=1.0), # 'MotionBlur': A.MotionBlur(blur_limit=(8, 12), always_apply=True, p=1.0), # 'MedianBlur': A.MedianBlur(blur_limit=7, always_apply=True, p=1.0), # 'GaussianBlur': A.GaussianBlur(blur_limit=7, always_apply=True, p=1.0), # 'GaussNoise': A.GaussNoise(var_limit=(10.0, 250.0), mean=0, always_apply=True, p=1.0), # 'CLAHE': A.CLAHE(clip_limit=4.0, tile_grid_size=(8, 8), always_apply=True, p=1.0), # 'ToGray': A.ToGray(always_apply=True, p=1.0), # 'RandomBrightnessContrast': A.RandomBrightnessContrast(brightness_limit=0.2, # contrast_limit=0.2, brightness_by_max=True, # always_apply=True, p=1.0), # 'ISONoise': A.ISONoise(color_shift=(0.0, 0.5), intensity=(0.1, 0.5), always_apply=True, p=1.0), # 'MultiplicativeNoise': A.MultiplicativeNoise(multiplier=(0.9, 1.1), per_channel=True, # elementwise=True, always_apply=True, p=1.0) } for k, v in augmentation_list.items(): augmentation = A.Compose([v], bbox_params=bbox_params) augmented = augmentation(image=image, bboxes=[animal_id_bbox], field_id=['1']) augmented_image_path = augmented_path + "/" + image_name + '_' + k + image_ext cv2.imwrite(augmented_image_path, augmented['image']) writer = Writer(augmented_path, image_width, image_height) bbox_coords = augmented['bboxes'][0] bbox_class = read_class_from_xml(xml_path) writer.addObject(bbox_class, bbox_coords[0], bbox_coords[1], bbox_coords[2], bbox_coords[3]) writer.save(augmented_image_path.replace(image_ext, '.xml')) for i in range(100): augmentation = A.Compose( [ A.RandomSizedBBoxSafeCrop(640, 640, erosion_rate=0.0, interpolation=1, always_apply=True, p=1.0), A.VerticalFlip(always_apply=False, p=0.5), A.HorizontalFlip(always_apply=False, p=0.5), A.Flip(always_apply=False, p=0.5), A.Transpose(always_apply=False, p=0.5), #A.RandomGamma(gamma_limit=(40, 150), eps=None, always_apply=False, p=0.25), A.RandomRotate90(always_apply=False, p=0.5), A.Rotate(limit=(-360, 360), interpolation=1, border_mode=1, value=None, mask_value=None, always_apply=False, p=0.5), #A.GaussNoise(var_limit=(10.0, 150.0), mean=0, always_apply=False, p=0.25), #A.ISONoise(color_shift=(0.0, 0.5), intensity=(0.1, 0.5), always_apply=False, p=0.25), #A.MultiplicativeNoise(multiplier=(0.9, 1.1), per_channel=True, # elementwise=True, always_apply=False, p=0.25), #A.Blur(blur_limit=3, always_apply=False, p=0.15), #A.ToGray(always_apply=False, p=0.15), #A.OneOf([ # A.HueSaturationValue(hue_shift_limit=(-15, 15), sat_shift_limit=(-15, 15), # val_shift_limit=(-15, 15), always_apply=False, p=0.5), # A.RGBShift(r_shift_limit=3, g_shift_limit=5, b_shift_limit=5, always_apply=False, p=0.5) #], p=0.25), #A.OneOf([ # A.RandomBrightness(limit=0.2, always_apply=False, p=0.5), # A.RandomContrast(limit=0.2, always_apply=False, p=0.5) #], p=0.5) ], bbox_params=bbox_params) augmented = augmentation(image=image, bboxes=[animal_id_bbox], field_id=['1']) augmented_image_path = augmented_path + "/" + image_name + '_' + str( i) + image_ext cv2.imwrite(augmented_image_path, augmented['image']) writer = Writer(augmented_path, image_width, image_height) bbox_coords = augmented['bboxes'][0] bbox_class = read_class_from_xml(xml_path) writer.addObject(bbox_class, bbox_coords[0], bbox_coords[1], bbox_coords[2], bbox_coords[3]) writer.save(augmented_image_path.replace(image_ext, '.xml')) print('\nSuccessfully augmentated images.')
plt.show() # has to be list of lists because there is for loop for the bboxes bboxes = [[x1, y1, x2, y2]] # fetch image class label = [img_data.label] # mapping used to display the class category_id_to_name = {1: 'cat', 0: 'dog'} # WITH TRANSFORMS transform = A.Compose([ # A.SmallestMaxSize(presize), # A.LongestMaxSize(presize), A.RandomSizedBBoxSafeCrop(config.presize, config.presize), # A.crops.transforms.CropAndPad(presize), # A.RandomCrop(crop, crop), # A.Normalize(), A.Rotate(limit=30), A.HorizontalFlip(p=0.5), A.Cutout(p=1.0), # albumentations.pytorch.ToTensorV2(), ], # bbox_params=A.BboxParams(format='coco'), bbox_params=A.BboxParams(format='coco', label_fields=['label']), ) # transformed = transform(image=image, bboxes=bboxes) transformed = transform(image=image, bboxes=bboxes,
def main(directory_list): bbox_params = A.BboxParams(format='pascal_voc', min_area=1, min_visibility=0.5, label_fields=['field_id']) dataset_dir = '../train/tensorflow-object-detection-api/data/augmented/data' augmented_dir = 'data/augmented' os.makedirs(dataset_dir, exist_ok=True) os.makedirs(augmented_dir, exist_ok=True) for images_dir in directory_list: print("\nMain folder: " + images_dir) images_path = os.path.join(os.getcwd(), 'data/{}'.format(images_dir)) # Пройти по всем папкам и найти images и их xmls images_list = [] for i in os.walk(images_path): if i[2]: #print("Folder: ") #print(i[0]) #print("with files:") #print(i[2]) #print() path = i[0] path_core = path[path.find(images_path):None] + "/" for image_name in i[2]: if image_name.lower().endswith(('.png', '.jpg', '.jpeg')): images_list.append( path_core.replace('\\', '/') + image_name) images_count = len(images_list) i = 0 is_cyrillic_path = False for image_path in images_list: print(image_path) image = cv2.imread(image_path) if image is None: f = open(image_path, "rb") chunk = f.read() chunk_arr = np.frombuffer(chunk, dtype=np.uint8) image = cv2.imdecode(chunk_arr, cv2.IMREAD_COLOR) is_cyrillic_path = True image_name = os.path.basename(image_path) image_ext = os.path.splitext(image_path)[1] xml_path = image_path.replace(image_ext, '.xml') animal_id_bbox = read_bbox_from_xml(xml_path) # show_image(image, bbox=animal_id_bbox) # Аугментировать изображение augmentation_orig = A.Compose([ A.RandomSizedBBoxSafeCrop(640, 640, erosion_rate=0.0, interpolation=1, always_apply=True, p=1.0), ], bbox_params=bbox_params) augmentation = A.Compose( [ A.RandomSizedBBoxSafeCrop(640, 640, erosion_rate=0.0, interpolation=1, always_apply=True, p=1.0), A.VerticalFlip(always_apply=False, p=0.5), A.HorizontalFlip(always_apply=False, p=0.5), A.Flip(always_apply=False, p=0.5), A.Transpose(always_apply=False, p=0.5), #A.RandomGamma(gamma_limit=(120, 150), eps=None, always_apply=False, p=0.75), A.RandomRotate90(always_apply=False, p=0.5), A.Rotate(limit=(-360, 360), interpolation=1, border_mode=1, value=None, mask_value=None, always_apply=False, p=0.5), #A.GaussNoise(var_limit=(10.0, 150.0), mean=0, always_apply=False, p=0.25), #A.ISONoise(color_shift=(0.0, 0.5), intensity=(0.1, 0.5), always_apply=False, p=0.25), #A.MultiplicativeNoise(multiplier=(0.9, 1.1), per_channel=True, # elementwise=True, always_apply=False, p=0.25), #A.Blur(blur_limit=3, always_apply=False, p=0.15), #A.ToGray(always_apply=False, p=0.15), #A.OneOf([ # A.HueSaturationValue(hue_shift_limit=(-15, 15), sat_shift_limit=(-15, 15), # val_shift_limit=(-15, 15), always_apply=False, p=0.5), # A.RGBShift(r_shift_limit=3, g_shift_limit=5, b_shift_limit=5, always_apply=False, p=0.5) #], p=0.75), #A.OneOf([ # A.RandomBrightness(limit=0.1, always_apply=False, p=0.5), # A.RandomContrast(limit=0.1, always_apply=False, p=0.5), # A.RandomBrightnessContrast(brightness_limit=0.1, # contrast_limit=0.1, brightness_by_max=False, # always_apply=False, p=0.5), #], p=0.75) ], bbox_params=bbox_params) for k in range(2): while True: if k == 0: augmented = augmentation_orig(image=image, bboxes=[animal_id_bbox], field_id=['1']) else: augmented = augmentation(image=image, bboxes=[animal_id_bbox], field_id=['1']) if augmented['bboxes']: break # show_image(augmented['image'], augmented['bboxes'][0]) image_path_core = image_path[ image_path.find('data/{}'.format(images_dir)):None] image_path_core = image_path_core.replace( 'data/{}'.format(images_dir), images_dir) image_path_core = image_path_core.replace( image_ext, '_' + str(k) + image_ext) augmented_path = augmented_dir + '/' + dataset_dir + '/' + image_path_core.replace( '\\', '/') os.makedirs(os.path.split(augmented_path)[0], exist_ok=True) if not is_cyrillic_path: cv2.imwrite(augmented_path, augmented['image']) else: is_success, im_buf_arr = cv2.imencode( image_ext, augmented['image']) im_buf_arr.tofile(augmented_path) writer = Writer(augmented_path, 640, 640) bbox_coords = augmented['bboxes'][0] bbox_class = read_class_from_xml(xml_path) writer.addObject(bbox_class, bbox_coords[0], bbox_coords[1], bbox_coords[2], bbox_coords[3]) print(augmented_path.replace(image_ext, '.xml')) writer.save(augmented_path.replace(image_ext, '.xml')) progress = (i * 100) / images_count print("Progress: " + "%.2f" % progress + "%") i += 1 print('\nSuccessfully augmentated images.')
bboxes = [[x1, y1, x2, y2]] # because there is for loop for the bboxes # print(f'bboxes: {bboxes}') label = [img_data.label] # print(f'label: {label}') category_id_to_name = {1: 'cat', 0: 'dog'} # print(f'category_id_to_name: {category_id_to_name[label[0]]}') # WITH TRANSFORMS presize = 256 crop = 256 transform = A.Compose( [ # A.SmallestMaxSize(presize), A.RandomSizedBBoxSafeCrop(crop, crop), A.RandomCrop(crop, crop), # A.Normalize(), A.Rotate(limit=30), A.HorizontalFlip(p=0.5), A.Cutout(p=1.0), # albumentations.pytorch.ToTensorV2(), ], # bbox_params=A.BboxParams(format='coco'), bbox_params=A.BboxParams(format='coco', label_fields=['label']), ) # transformed = transform(image=image, bboxes=bboxes) transformed = transform(image=image, bboxes=bboxes, label=label) visualize(transformed['image'], transformed['bboxes'], transformed['label'],