def get_train_augmentations(image_size: int = 224, mean: tuple = (0, 0, 0), std: tuple = (1, 1, 1)): return A.Compose( [ # A.RandomBrightnessContrast(brightness_limit=32, contrast_limit=(0.5, 1.5)), # A.HueSaturationValue(hue_shift_limit=18, sat_shift_limit=(1, 2)), # A.CoarseDropout(20), A.Rotate(10), A.Resize(image_size, image_size), # A.RandomCrop(image_size, image_size, p=0.5), A.LongestMaxSize(image_size), # A.Equalize(mode='cv', by_channels=True, mask=None, always_apply=False, p=0.5), # A.Normalize(mean=mean, std=std), MinMaxScale(), A.HorizontalFlip(), A.PadIfNeeded(image_size, image_size, 0), # A.Transpose(), ToTensor(), ] )
def _preprocess(self, x: np.ndarray, mask: Optional[np.ndarray]): x = albu.LongestMaxSize()(image=x)['image'] x, _ = self.normalize_fn(x, x) if mask is None: mask = np.ones_like(x, dtype=np.float32) else: mask = np.round(mask.astype('float32') / 255) h, w, _ = x.shape block_size = 32 min_height = (h // block_size + 1) * block_size min_width = (w // block_size + 1) * block_size pad_params = {'mode': 'constant', 'constant_values': 0, 'pad_width': ((0, min_height - h), (0, min_width - w), (0, 0)) } x = np.pad(x, **pad_params) mask = np.pad(mask, **pad_params) return map(self._array_to_batch, (x, mask)), h, w
def get_train_transform(image_size, augmentation=None, preprocessing=None, crop_black=True): if augmentation is None: augmentation = 'none' artificial = augmentation.endswith('-art') if artificial: augmentation = augmentation.replace('-art', '') print('Using Artifical decease sings generation') LEVELS = { 'none': get_none_augmentations, 'light': get_light_augmentations, 'medium': get_medium_augmentations, 'hard': get_hard_augmentations, 'hard2': get_hard_augmentations_v2 } assert augmentation in LEVELS.keys() augmentation = LEVELS[augmentation](image_size) longest_size = max(image_size[0], image_size[1]) return A.Compose([ CropBlackRegions(tolerance=5) if crop_black else A.NoOp( always_apply=True), A.LongestMaxSize(longest_size, interpolation=cv2.INTER_CUBIC), # Fake decease generation A.Compose([AddMicroaneurisms(), AddCottonWools()], p=float(artificial)), A.PadIfNeeded(image_size[0], image_size[1], border_mode=cv2.BORDER_CONSTANT, value=0), augmentation, get_preprocessing_transform(preprocessing), A.Normalize() ])
def valid_albumentations_tfms_pets(): """ Composes a Pipeline of Albumentations Transforms for PETS dataset at the train stage Returns: AlbumentationsTransform: Pipeline of Albumentations Transforms Examples:: >>> valid_tfms = valid_albumentations_tfms_pets(train=false) >>> valid_ds = Dataset(valid_records, valid_tfms) [[https://albumentations.readthedocs.io/en/latest/_modules/albumentations/augmentations/transforms.html/|Albumentations Transforms ]] """ import albumentations as A # ImageNet stats imagenet_mean, imagenet_std = IMAGENET_STATS return AlbuTransform([ A.LongestMaxSize(384), A.Normalize(mean=imagenet_mean, std=imagenet_std) ])
def __init__(self, opt): super(Dataset3D, self).__init__() self.opt = opt self.augs = A.Compose([ A.LongestMaxSize(max(self.opt.input_h, self.opt.input_w), always_apply=True), A.PadIfNeeded(self.opt.input_h, self.opt.input_w, border_mode=cv2.BORDER_CONSTANT, value=[0, 0, 0]), A.Blur(blur_limit=(4, 8), p=0.1), # A.ShiftScaleRotate(shift_limit=0.2, scale_limit=(-0.4, 0.2), rotate_limit=0, # border_mode=cv2.BORDER_CONSTANT, value=(0, 0, 0), p=0.8), A.OneOf([ A.RandomBrightnessContrast(always_apply=True), A.RandomGamma(gamma_limit=(60, 140), always_apply=True), # A.CLAHE(always_apply=True) ], p=0.5), A.OneOf([ A.RGBShift(), A.HueSaturationValue(), A.ToGray() ], p=0.1) ], keypoint_params=A.KeypointParams(format='xy', remove_invisible=False) )
def transform(self): return Compose([ albu.LongestMaxSize(np.max( self.cfg.INSIGHTFACE.PREPROCESS.IMAGE_SIZE), interpolation=cv2.INTER_LINEAR, always_apply=False, p=1), albu.PadIfNeeded( min_height=self.cfg.INSIGHTFACE.PREPROCESS.IMAGE_SIZE[0], min_width=self.cfg.INSIGHTFACE.PREPROCESS.IMAGE_SIZE[1], border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, always_apply=False, p=1.0), ToTensor(num_classes=1, sigmoid=False, normalize={ 'mean': self.cfg.INSIGHTFACE.PREPROCESS.MEAN, 'std': self.cfg.INSIGHTFACE.PREPROCESS.STD }) ])
def offline_da_fn(height, width, augment=True): da_transform = [] if augment: da_transform += [ # A.HorizontalFlip(p=0.5), A.ShiftScaleRotate(scale_limit=0.05, rotate_limit=7, shift_limit=0.05, border_mode=cv2.BORDER_CONSTANT, p=1.0), A.Perspective(scale=(0.015, 0.025), p=0.3), A.RandomResizedCrop(height=height, width=width, scale=(0.95, 1.0), p=0.3), # A.OneOf( # [ # A.CLAHE(p=1), # A.RandomBrightness(p=1), # A.RandomGamma(p=1), # A.RandomContrast(limit=0.1, p=1.0), # ], # p=0.5, # ), # # A.OneOf( # [ # A.Sharpen(alpha=(0.2, 0.5), lightness=(0.5, 1.0), p=1.0), # A.Blur(blur_limit=[2, 3], p=1.0), # A.GaussNoise(var_limit=(5, 25), p=1.0), # # A.MotionBlur(blur_limit=3, p=1.0), # ], # p=0.5, # ), # # A.Lambda(image=_da_negative, p=0.2), ] da_transform += [ A.LongestMaxSize(max_size=max(height, width), interpolation=cv2.INTER_LANCZOS4, always_apply=True), A.PadIfNeeded(min_height=height, min_width=width, border_mode=cv2.BORDER_CONSTANT, always_apply=True), ] return A.Compose(da_transform)
def default_transform(self, mode="train"): if mode == "train": transform = A.Compose([ A.Flip(0.5), A.ShiftScaleRotate(scale_limit=0.1, rotate_limit=45, p=0.25), A.LongestMaxSize(max_size=800, p=1.0), A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0) ], bbox_params={ 'format': 'pascal_voc', 'label_fields': ['labels'] }) elif mode == 'val': transform = A.Compose([ A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0) ], bbox_params={ 'format': 'pascal_voc', 'label_fields': ['labels'] }) else: transform = A.Compose([ A.Normalize(mean=(0, 0, 0), std=(1, 1, 1), max_pixel_value=255.0, p=1.0), ToTensorV2(p=1.0) ]) return transform
def test_fisheye_undistortion(image_fname): image = cv2.cvtColor(cv2.imread(image_fname), cv2.COLOR_RGB2BGR) transform = A.Compose([ CropBlackRegions(), A.LongestMaxSize(512), A.PadIfNeeded(512, 512, border_mode=cv2.BORDER_CONSTANT) ]) image = transform(image=image)['image'] def update(*args, **kwargs): fx = cv2.getTrackbarPos('fx', 'Test') k = cv2.getTrackbarPos('k', 'Test') k_real = (k - 200) / 400 print(fx, k_real) K = np.array([[fx, 0, 256], [0, fx, 256], [0, 0, 1]], dtype=np.float32) # D = np.array([-2.57614020e-01, 8.77086999e-02, -2.56970803e-04, -5.93390389e-04]) D = np.array([ [k_real], [k_real], [0], [0], ], dtype=np.float32) und = removeFisheyeLensDist(image, K, D, DIM=(768, 768)) cv2.imshow('Test', und) # cv2.waitKey(1) cv2.namedWindow('Test') cv2.createTrackbar('fx', 'Test', 400, 1024, update) cv2.createTrackbar('k', 'Test', 200, 400, update) update() while cv2.waitKey(30) != 'q': pass
def run(json_path, savedir, size, ignore_small=True): data = get_data(json_path) transform = A.Compose([A.LongestMaxSize(size)]) if not os.path.exists(savedir): os.makedirs(savedir) sizes = [] for info in tqdm(data): filename = info['file_name'] identifier = os.path.basename(filename).split("_")[0] height = info['height'] width = info['width'] image = cv2.imread(filename) for i, a in enumerate(info['annotations']): x, y, u, v = make_square(a['bbox'], height, width) # TODO: maybe save segmentation? and blur background selected_img = image[y:v, x:u].copy() if np.min(selected_img.shape[:2]) >= size: selected_img = transform(image=selected_img)['image'] else: if ignore_small: continue #sizes.append() sizes.append(np.min(selected_img.shape[:2])) savepath = os.path.join(savedir, f"{identifier}_{i}.png") # coords = [] # for points in a['segmentation']: # xx = [k-x for k in points[::2]] # yy = [k-y for k in points[1::2]] # p = np.stack([xx, yy], -1).astype('int32').reshape((1, -1, 2)) # #print(p.shape) # #print(selected_img.dtype, selected_img.shape) # cv2.fillPoly(selected_img, p, (255, 255, 255), 8) cv2.imwrite(savepath, selected_img) #print(filename) #return print(Counter(sizes))
def __init__(self, max_size: int = 960, device: str = "cpu") -> None: self.model = RetinaFace( name="Resnet50", pretrained=False, return_layers={ "layer2": 1, "layer3": 2, "layer4": 3 }, in_channels=256, out_channels=256, ).to(device) self.device = device self.transform = A.Compose( [A.LongestMaxSize(max_size=max_size, p=1), A.Normalize(p=1)]) self.max_size = max_size self.prior_box = priorbox( min_sizes=[[16, 32], [64, 128], [256, 512]], steps=[8, 16, 32], clip=False, image_size=(self.max_size, self.max_size), ).to(device) self.variance = [0.1, 0.2]
def get_training_albumentations(): train_transform = [ albu.LongestMaxSize(244), albu.HorizontalFlip(), albu.ShiftScaleRotate(scale_limit=0.5, rotate_limit=0, shift_limit=0.1, p=1, border_mode=0), albu.PadIfNeeded(min_height=224, min_width=224, always_apply=True, border_mode=0), albu.RandomCrop(height=224, width=224, always_apply=True), albu.IAAAdditiveGaussianNoise(p=0.2), albu.IAAPerspective(p=0.5), albu.OneOf( [ albu.CLAHE(p=1), albu.RandomBrightness(p=1), albu.RandomGamma(p=1), ], p=0.9, ), albu.OneOf( [ albu.IAASharpen(p=1), albu.Blur(blur_limit=3, p=1), albu.MotionBlur(blur_limit=3, p=1), ], p=0.9, ), albu.OneOf( [ albu.RandomContrast(p=1), albu.HueSaturationValue(p=1), ], p=0.9, ), ] return albu.Compose(train_transform)
def resize_transforms(image_size=224): BORDER_CONSTANT = 0 pre_size = int(image_size * 1.5) random_crop = albu.Compose( [ albu.SmallestMaxSize(pre_size, p=1), albu.RandomCrop(image_size, image_size, p=1), ] ) rescale = albu.Compose([albu.Resize(image_size, image_size, p=1)]) random_crop_big = albu.Compose( [ albu.LongestMaxSize(pre_size, p=1), albu.RandomCrop(image_size, image_size, p=1), ] ) # Converts the image to a square of size image_size x image_size result = [albu.OneOf([random_crop, rescale, random_crop_big], p=1)] return result
def __getitem__(self, idx: int) -> Dict[str, Any]: image_path = self.file_names[idx] image = load_rgb(image_path) height, width = image.shape[:2] # Resize resizer = albu.Compose([albu.LongestMaxSize(max_size=768, p=1)], p=1) image = resizer(image=image)["image"] # pad image, pads = pad(image, factor=768) # apply augmentations image = self.transform(image=image)["image"] return { "image_id": image_path.stem, "features": tensor_from_rgb_image(image), "pads": np.array(pads).T, "height": height, "width": width, }
def extract(json_path, datadir, savedir, size): if not os.path.exists(savedir): os.makedirs(savedir) transform = A.Compose([ A.LongestMaxSize(size) ]) df = pd.read_json(json_path, lines=True) for i, annotations in tqdm(enumerate(df['annotation'].values), total=df.shape[0]): image_path = os.path.join(datadir, f"image_{i}.png") image = cv2.imread(image_path) for k, annotation in enumerate(annotations): savepath = os.path.join(savedir, f"image_{i}_{k}.png") height = annotation['imageHeight'] width = annotation['imageWidth'] assert image.shape[0] == height and image.shape[1] == width if not 'Face' in annotation['label']: print(i, annotation) continue points = annotation['points'] assert len(points) == 2 start, end = points x, y = start['x'], start['y'] u, v = end['x'], end['y'] res = make_square((x, y, u, v), height, width) x, y, u, v = res selected_img = image[y:v, x: u] if np.min(selected_img.shape[:2]) > size: selected_img = transform(image=selected_img)['image'] else: continue cv2.imwrite(savepath, selected_img) #print(annotation) #break #break pass
def get_train_aug(RESOLUTION=300): return A.Compose([ A.LongestMaxSize(max_size=RESOLUTION*2, interpolation=cv2.INTER_CUBIC, \ always_apply=True), A.PadIfNeeded(min_height=RESOLUTION*2, min_width=RESOLUTION*2, always_apply=True, border_mode=cv2.BORDER_CONSTANT), A.RandomResizedCrop(RESOLUTION,RESOLUTION, scale=(0.7, 1), \ interpolation=cv2.INTER_CUBIC), A.Resize(RESOLUTION, RESOLUTION, p=1.0, interpolation=cv2.INTER_CUBIC), A.FancyPCA(p=0.8, alpha=0.5), # A.Transpose(p=0.7), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.ShiftScaleRotate(p=0.4, rotate_limit=12), A.HueSaturationValue( always_apply=False, p=0.3, hue_shift_limit=(-20, 20), sat_shift_limit=(-30, 30), val_shift_limit=(-20, 20)), # A.HueSaturationValue( # hue_shift_limit=0.4, #.3 # sat_shift_limit=0.4, #.3 # val_shift_limit=0.4, #.3 # p=0.7 # ), A.RandomBrightnessContrast( brightness_limit=(-0.5,0.5), #-.2,.2 contrast_limit=(-0.4, 0.4), #-.2,.2 #p=0.6 ), A.CoarseDropout(p=0.8, max_holes=30), # A.Cutout(p=0.8, max_h_size=40, max_w_size=40), A.Cutout(p=1, max_h_size=60, max_w_size=30, num_holes=6, fill_value=[106,87,55]), A.Cutout(p=1, max_h_size=30, max_w_size=60, num_holes=6, fill_value=[106,87,55]), A.OneOf([ A.OpticalDistortion(always_apply=False, p=1.0, distort_limit=(-0.6599999666213989, 0.6800000071525574), shift_limit=(-0.6699999570846558, 0.4599999785423279), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None), # A.OpticalDistortion(p=0.5, distort_limit=0.15, shift_limit=0.15), # A.GridDistortion(p=0.5, distort_limit=0.5), A.GridDistortion(always_apply=False, p=1.0, num_steps=6, distort_limit=(-0.4599999785423279, 0.5), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None), # A.IAAPiecewiseAffine(p=0.5, scale=(0.1, 0.14)), ], p=0.6), A.Sharpen(p=1.0, alpha=(0.1,0.3), lightness=(0.3, 0.9)), A.GaussNoise(var_limit=(300.0, 500.0), p=0.4), A.ISONoise(always_apply=False, p=0.4, intensity=(0.10000000149011612, 1.399999976158142), color_shift=(0.009999999776482582, 0.4000000059604645)), A.OneOf([ A.Equalize(always_apply=False, p=1.0, mode='cv', by_channels=True), A.Solarize(always_apply=False, p=1.0, threshold=(67, 120)), # A.IAAAdditiveGaussianNoise(p=1.0), A.GaussNoise(p=1.0), A.MotionBlur(always_apply=False, p=1.0, blur_limit=(5, 20)) ], p=0.5), ], p=1.0)
def get_augmentation(version): if version == "v1": # YOLOv2 size size = 448 # ImageNet Normalization normalization = A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1) augmentation = { "train": A.Compose([ A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.25), A.Blur(blur_limit=4, always_apply=False, p=0.25), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.25), A.OneOf([ A.RandomSizedBBoxSafeCrop(size, size, erosion_rate=0.0, interpolation=1, always_apply=False, p=0.5), A.Resize(size, size, interpolation=1, always_apply=False, p=0.5), ], p=1), normalization ]), "valid": A.Compose([ A.Resize(size, size, interpolation=1, always_apply=False, p=1), normalization ]), } elif version == "v2": # YOLOv2 size size = 448 # ImageNet Normalization normalization = A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), p=1) augmentation = { "train": A.Compose([ A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.5), A.RandomRotate90(p=0.5), A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.25), A.Blur(blur_limit=4, always_apply=False, p=0.25), A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2, always_apply=False, p=0.25), A.LongestMaxSize(max_size=size, interpolation=1, always_apply=False, p=1), A.PadIfNeeded(min_height=size, min_width=size, pad_height_divisor=None, pad_width_divisor=None, border_mode=1, value=None, mask_value=None, always_apply=False, p=1), normalization ]), "valid": A.Compose([ A.LongestMaxSize(max_size=size, interpolation=1, always_apply=False, p=1), A.PadIfNeeded(min_height=size, min_width=size, pad_height_divisor=None, pad_width_divisor=None, border_mode=1, value=None, mask_value=None, always_apply=False, p=1), normalization ]), } else: raise Exception(f"Augmentation version '{version}' is unknown!") return augmentation
Scale = [IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8] # 13, 26, 52 CHECKPOINT_FILE = "checkpoint.pth.tar" LOAD_MODEL = False # Rescaled anchors to be between [0, 1] # These are calculated using k-means on COCO dataset # TODO: Recalculate for COCO anchors = [ [(0.28, 0.22), (0.38, 0.48), (0.90, 0.78)], [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)], [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)] ] train_transforms = A.Compose( [ A.LongestMaxSize(max_size=int(IMAGE_SIZE)), A.PadIfNeeded(min_height=int(IMAGE_SIZE), min_width=int(IMAGE_SIZE), border_mode=cv2.BORDER_CONSTANT), A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE), A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.1), A.ShiftScaleRotate(rotate_limit=10, p=0.2, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), #A.Blur(p=0.2), #A.CLAHE(p=0.2), #A.Posterize(p=0.2), A.ToGray(p=0.1), A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0), ToTensorV2() ], bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]) )
def __init__(self, file_paths: List[Path], max_size: int, transform: albu.Compose) -> None: self.file_paths = file_paths self.transform = transform self.max_size = max_size self.resize = albu.LongestMaxSize(max_size=max_size, p=1)
LOAD_MODEL = True SAVE_MODEL = True CHECKPOINT_FILE = "checkpoint.pth.tar" IMG_DIR = DATASET + "/images/" LABEL_DIR = DATASET + "/labels/" ANCHORS = [ [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)], [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)], [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)], ] # Note these have been rescaled to be between [0, 1] scale = 1.1 train_transforms = A.Compose( [ A.LongestMaxSize(max_size=int(IMAGE_SIZE * scale)), A.PadIfNeeded( min_height=int(IMAGE_SIZE * scale), min_width=int(IMAGE_SIZE * scale), border_mode=cv2.BORDER_CONSTANT, ), A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE), A.ColorJitter( brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4), A.OneOf( [ A.ShiftScaleRotate( rotate_limit=10, p=0.4, border_mode=cv2.BORDER_CONSTANT), A.IAAAffine(shear=10, p=0.4, mode="constant"), ], p=1.0,
def __init__( self, crop_height: typing.Union[int, typing.AnyStr] = 320, input_height: int = 224, gaussian_blur: bool = True, jitter_strength: float = 1., seed_wrap_augments: bool = False, use_hflip_augment: bool = False, drop_orig_image: bool = True, crop_scale: typing.Tuple[float, float] = (0.2, 1.0), crop_ratio: typing.Tuple[float, float] = (1.0, 1.0), shared_transform=True, augmentation=True, #Will be used to disable augmentation on inference / validation. crop_strategy="centroid", sync_hflip=False, same_crop=False) -> None: self.crop_height = crop_height self.input_height = input_height self.gaussian_blur = gaussian_blur self.jitter_strength = jitter_strength self.seed_wrap_augments = seed_wrap_augments self.use_hflip_augment = use_hflip_augment self.drop_orig_image = drop_orig_image self.shared_transform = shared_transform self.enable_augmentation = augmentation self.crop_strategy = crop_strategy self.sync_hflip = sync_hflip self.crop_scale = crop_scale self.crop_ratio = crop_ratio self.same_crop = same_crop bbox_transforms = [ albumentations.LongestMaxSize(max_size=224), albumentations.PadIfNeeded( min_height=224, min_width=224, border_mode=0, ) ] assert self.crop_strategy in ["centroid", "bbox", "bbox_same_crop"] if self.enable_augmentation: augment_transforms = [ albumentations.RandomResizedCrop( height=self.input_height, width=self.input_height, scale=self.crop_scale, ratio=self.crop_ratio, ), ] if self.crop_strategy in ["bbox", "bbox_same_crop"]: augment_transforms = bbox_transforms + augment_transforms if self.use_hflip_augment: augment_transforms.append(albumentations.HorizontalFlip(p=0.5)) augment_transforms.extend([ albumentations.ColorJitter( brightness=0.4 * self.jitter_strength, contrast=0.4 * self.jitter_strength, saturation=0.4 * self.jitter_strength, hue=0.1 * self.jitter_strength, p=0.8, ), albumentations.ToGray(p=0.2), ]) if self.gaussian_blur: # @@@@@ TODO: check what kernel size is best? is auto good enough? #kernel_size = int(0.1 * self.input_height) #if kernel_size % 2 == 0: # kernel_size += 1 augment_transforms.append( albumentations.GaussianBlur( blur_limit=(3, 5), #blur_limit=kernel_size, #sigma_limit=??? p=0.5, )) else: augment_transforms = bbox_transforms if self.seed_wrap_augments: assert thelper_available self.augment_transform = thelper.transforms.wrappers.SeededOpWrapper( operation=albumentations.Compose(augment_transforms), sample_kw="image", ) else: self.augment_transform = albumentations.Compose(augment_transforms) self.convert_transform = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # add online train transform of the size of global view self.online_augment_transform = albumentations.Compose([ albumentations.RandomResizedCrop( height=self.input_height, width=self.input_height, scale=(0.5, 1.0), # @@@@ adjust if needed? ), # @@@@@@@@@ BAD W/O SEED WRAPPER? albumentations.HorizontalFlip( p=0.5), # @@@@@@@@@ BAD W/O SEED WRAPPER? ]) self.sync_hflip_transform = albumentations.Compose([ albumentations.HorizontalFlip( p=1), # @@@@@@@@@ BAD W/O SEED WRAPPER? ])
def get_training_augmentation(size): train_transform = [ A.LongestMaxSize(max_size=size, always_apply=True), A.PadIfNeeded(min_height=size, min_width=size, always_apply=True, border_mode=0), # A.RandomCrop(height=size, width=size, always_apply=True), # A.VerticalFlip(p=0.5), # A.HorizontalFlip(p=0.5), # A.RandomRotate90(p=0.5), A.ShiftScaleRotate(scale_limit=0.2, rotate_limit=0, shift_limit=0.2, p=0.1, border_mode=0), A.IAAPerspective(p=0.1), A.CoarseDropout(p=0.1), A.ChannelDropout(p=0.1), A.RGBShift(p=0.1), A.OneOf( [A.OpticalDistortion(p=0.5), A.GridDistortion(p=0.5)], p=0.1, ), A.OneOf( [ A.CLAHE(p=0.5), A.RandomBrightness(p=0.5), A.RandomGamma(p=0.5), ], p=0.5, ), A.OneOf( [ A.GaussianBlur(p=0.1), A.IAASharpen(p=0.5), A.Blur(blur_limit=5, p=0.5), A.MotionBlur(blur_limit=5, p=0.5), ], p=0.5, ), A.OneOf( [ A.RandomContrast(p=0.5), A.HueSaturationValue(p=0.5), ], p=0.1, ), A.Lambda(mask=round_clip_0_1), A.Cutout(num_holes=8, max_h_size=20, max_w_size=20, fill_value=0, always_apply=False, p=0.2), A.CoarseDropout(max_holes=8, max_height=20, max_width=20, min_holes=None, min_height=None, min_width=None, fill_value=0, always_apply=False, p=0.2), # A.GlassBlur(sigma=0.7, max_delta=4, iterations=2, always_apply=False, mode='fast', p=0.2) ] return A.Compose(train_transform)
'optim': 'sgd', "batch_size": 24, "n_splits": 5, "fold": 0, "seed": 0, "device": "cuda:0", "out_dim": 1049, "n_classes": 1049, 'class_weights': "log", 'class_weights_norm': 'batch', "normalization": "imagenet", "crop_size": 448, } args['tr_aug'] = A.Compose([ A.LongestMaxSize(512, p=1), A.PadIfNeeded(512, 512, border_mode=cv2.BORDER_CONSTANT, p=1), A.RandomCrop(always_apply=False, p=1.0, height=args['crop_size'], width=args['crop_size']), A.HorizontalFlip(always_apply=False, p=0.5), ], p=1.0) args['val_aug'] = A.Compose([ A.LongestMaxSize(512, p=1), A.PadIfNeeded(512, 512, border_mode=cv2.BORDER_CONSTANT, p=1), A.CenterCrop(always_apply=False, p=1.0, height=args['crop_size'],
def run(datadir, n_gpus, epochs, batch_size, learning_rate): n_max_gpus = torch.cuda.device_count() print(f'{n_max_gpus} GPUs available') n_gpus = min(n_gpus, n_max_gpus) print(f'Using {n_gpus} GPUs') device_ids = list(range(n_gpus)) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') preprocess = A.Compose([ A.LongestMaxSize(max(IMAGE_SIZE)), A.PadIfNeeded(*IMAGE_SIZE), A.Normalize() ]) augment = A.Compose( [A.RandomBrightness(0.3), A.RandomContrast(0.2), A.HorizontalFlip()]) ds_train = StanfordDogs(datadir, split='train', preprocess=preprocess, augment=augment) ds_val = StanfordDogs(datadir, split='test', preprocess=preprocess) n_workers = 8 dl_train = DataLoader(ds_train, batch_size=batch_size, shuffle=True, num_workers=n_workers) dl_val = DataLoader(ds_val, batch_size=batch_size, num_workers=n_workers) model = EfficientNet(backbone='efficientnet_b2', n_classes=N_CLASSES) model = nn.DataParallel(model, device_ids=device_ids) model.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) t_train_start = time.time() for e in range(epochs): t_epoch_start = time.time() model.train() for i, (images, labels) in enumerate(dl_train): optimizer.zero_grad() images, labels = images.to(device), labels.to(device) logits = model(images) loss = criterion(logits, labels) loss.backward() optimizer.step() acc = accuracy(logits, labels) epoch_time = time.time() - t_epoch_start show_progress(e, i, len(dl_train), loss=loss.detach().cpu().numpy(), acc=acc.detach().cpu().numpy(), epoch_time=epoch_time) acc_val = [] model.eval() with torch.no_grad(): for images, labels in dl_val: images, labels = images.to(device), labels.to(device) logits = model(images) acc = accuracy(logits, labels) acc_val.append(acc.cpu().numpy()) acc = np.mean(acc_val) t_epoch = time.time() - t_epoch_start print(f'\nEpoch{e} val-acc: {acc:.4}, time: {t_epoch:.4}s') t_train = time.time() - t_train_start print(f'Training finished with {t_train:.4}s')
import albumentations as A from PIL import Image import numpy as np import matplotlib.pyplot as plt import cv2 image = Image.open('image.jpg') # print(f'PIL before convert', image.size) image = image.convert('RGB') # needed for normalzie. W/o it there are w*h*4 # print(f'PIL after convert', image.size) image = np.array(image) transform = A.Compose([ # A.RandomResizedCrop(256, 256), A.LongestMaxSize(256), # A.SmallestMaxSize(256), # A.Normalize(), # A.RandomCrop(256, 256), # A.CLAHE(), # sharpness # A.CoarseDropout(), # rectangular # A.ColorJitter(), # color aug # A.Cutout(), # square # A.Equalize(), # color aug # A.HorizontalFlip(), # A.HueSaturationValue(10, 10, 10, p=1) # color aug # Pad side of the image/max if side is less than desired number A.PadIfNeeded(256, 256, border_mode=cv2.BORDER_CONSTANT), # A.RandomBrightness(), # A.RandomContrast(), # A.RandomBrightnessContrast(),
kwarg = { 'dataset': dataset, 'batch_size': batch_size, 'shuffle': shuffle, 'validation_split': validation_split, 'num_workers': num_workers, 'collate_fn': dataset.collate_fn } super(ChargridDataloader, self).__init__(**kwarg) if __name__ == "__main__": size = 64 aug = alb.Compose([ alb.LongestMaxSize(size + 24), alb.PadIfNeeded(size + 24, size + 24, border_mode=cv2.BORDER_CONSTANT), alb.RandomCrop(size, size, p=0.3), alb.Resize(size, size) ], alb.BboxParams(format='coco', label_fields=['lbl_id'], min_area=2.0)) dataset = SegDataset('./data', 'train_files.txt', transform=aug) data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=dataset.collate_fn) print(len(data_loader)) for idx, sample in enumerate(data_loader): img, mask, boxes, lbl_boxes = sample print(img.size())
class Yolo2(object): width = height = 544 # train_transform = A.Compose( # Yolo [ # A.RandomSizedCrop(min_max_height=(800, 1024), height=1024, width=1024, p=0.5), # A.RandomScale(scale_limit=0.3, p=1.0), # 这个有问题 C.RandomResize(scale_limit=0.3, p=1.0), # 调节长宽比 [1/1.3, 1.3] A.OneOf( [ A.Sequential( [ A.SmallestMaxSize(min(height, width), p=1.0), A.RandomCrop(height, width, p=1.0) # 先resize到短边544,再crop成544×544 ], p=0.4), A.LongestMaxSize(max(height, width), p=0.6), # resize到长边544 ], p=1.0), # A.LongestMaxSize(max(height, width), p=1.0), A.OneOf([ A.HueSaturationValue(hue_shift_limit=0.4, sat_shift_limit=0.4, val_shift_limit=0.4, p=0.9), A.RandomBrightnessContrast( brightness_limit=0.3, contrast_limit=0.3, p=0.9), ], p=0.9), # A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5,0.5,0.5), p=1.0), C.RandomPad(min_height=height, min_width=width, border_mode=0, value=(0.5, 0.5, 0.5), p=1.0), A.HorizontalFlip(p=0.5), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']), ) divisor = 32 val_transform = A.Compose( # Yolo [ A.LongestMaxSize(width, p=1.0), A.PadIfNeeded(min_height=height, min_width=width, border_mode=0, value=(0.5, 0.5, 0.5), p=1.0), ToTensorV2(p=1.0), ], p=1.0, bbox_params=A.BboxParams(format='pascal_voc', min_area=0, min_visibility=0, label_fields=['labels']))
A.IAAEmboss(), A.RandomBrightnessContrast(), ], p=0.3), A.Normalize(), M.MyToTensorV2(), ], additional_targets={ 'right_img': 'image', 'left_normal': 'normal', 'right_normal': 'normal', } ) img_transform = A.Compose( [ A.LongestMaxSize(max_size=IMAGE_SIZE), A.PadIfNeeded(min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT, value=0), A.Normalize(), M.MyToTensorV2(), ], additional_targets={ 'right_img': 'image', } ) _, dataloader = create_dataloader("../bdataset_stereo", "train.json", transform=my_transform) left_imgs, right_imgs, left_normals, right_normals = next(iter(dataloader)) assert left_imgs.shape == right_imgs.shape, "dataset error" assert right_normals.shape == left_normals.shape, "dataset error" assert left_imgs.shape == (2, 3, 256, 256), f"dataset error {left_imgs.shape}" assert left_normals.shape == (2, 3, 256, 256), f"dataset error {left_normals.shape}"
def pre_transforms(image_size=224): result = [ A.LongestMaxSize(max_size=image_size), A.PadIfNeeded(image_size, image_size, border_mode=0) ] return result
state_dict = rename_layers(state_dict, {"model.": ""}) model.load_state_dict(state_dict) return model @st.cache(allow_output_mutation=True) def cached_model(): model = load_model() model.eval() return model model = cached_model() transform = albu.Compose( [albu.LongestMaxSize(max_size=MAX_SIZE), albu.Normalize(p=1)], p=1 ) st.title("Segment glomeruli") # What about a TIFF image? uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"]) if uploaded_file is not None: original_image = np.array(Image.open(uploaded_file)) st.image(original_image, caption="Before", use_column_width=True) st.write("") st.write("Detecting glomeruli...") original_height, original_width = original_image.shape[:2] image = transform(image=original_image)["image"] padded_image, pads = pad(image, factor=MAX_SIZE, border=cv2.BORDER_CONSTANT)