def test_augmentation_list(self): input_shape = (100, 100) image = np.random.rand(*input_shape).astype("float32") sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8") inputs = T.StandardAugInput(image, sem_seg=sem_seg) # provide two args augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)]) _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
def __init__( self, is_train: bool, *, augmentations: List[Union[T.Augmentation, T.Transform]], image_format: str, # Extra data augmentation for point supervision sample_points: int = 0, ): """ NOTE: this interface is experimental. Args: is_train: whether it's used in training or inference augmentations: a list of augmentations or deterministic transforms to apply image_format: an image format supported by :func:`detection_utils.read_image`. sample_points: subsample points at each iteration """ # fmt: off self.is_train = is_train self.augmentations = T.AugmentationList(augmentations) self.image_format = image_format self.sample_points = sample_points # fmt: on logger = logging.getLogger(__name__) mode = "training" if is_train else "inference" logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") logger.info(f"Point Augmentations used in {mode}: sample {sample_points} points")
def test_print_augmentation(self): t = T.RandomCrop("relative", (100, 100)) self.assertEqual(str(t), "RandomCrop(crop_type='relative', crop_size=(100, 100))") t0 = T.RandomFlip(prob=0.5) self.assertEqual(str(t0), "RandomFlip(prob=0.5)") t1 = T.RandomFlip() self.assertEqual(str(t1), "RandomFlip()") t = T.AugmentationList([t0, t1]) self.assertEqual(str(t), f"AugmentationList[{t0}, {t1}]")
def __init__( self, is_train: bool, augmentations: List[Union[T.Augmentation, T.Transform]], image_format: str): # fmt: off self.is_train = is_train self.augmentations = T.AugmentationList(augmentations) self.image_format = image_format # fmt: on logger = logging.getLogger(__name__) mode = "training" if is_train else "inference" logger.info(f"[DatasetMapper] Augmentations used in {mode}: {augmentations}")
def __init__(self, is_train: bool, *, augmentations: List[Union[T.Augmentation, T.Transform]], image_format: str, mosaic_trans: Optional[CfgNode], use_instance_mask: bool = False, use_keypoint: bool = False, instance_mask_format: str = "polygon", recompute_boxes: bool = False, add_meta_infos: bool = False): """ Args: augmentations: a list of augmentations or deterministic transforms to apply image_format: an image format supported by :func:`detection_utils.read_image`. mosaic_trans: a CfgNode for Mosaic transformation. use_instance_mask: whether to process instance segmentation annotations, if available use_keypoint: whether to process keypoint annotations if available instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation masks into this format. recompute_boxes: whether to overwrite bounding box annotations by computing tight bounding boxes from instance mask annotations. add_meta_infos: whether to add `meta_infos` field """ if recompute_boxes: assert use_instance_mask, "recompute_boxes requires instance masks" # fmt: off self.is_train = is_train self.augmentations = T.AugmentationList(augmentations) self.image_format = image_format self.use_instance_mask = use_instance_mask self.instance_mask_format = instance_mask_format self.use_keypoint = use_keypoint self.recompute_boxes = recompute_boxes self.add_meta_infos = add_meta_infos # fmt: on logger = logging.getLogger(__name__) mode = "training" if is_train else "inference" logger.info( f"[DatasetMapper] Augmentations used in {mode}: {augmentations}") self.mosaic_trans = mosaic_trans if self.mosaic_trans.ENABLED: self.mosaic_pool = deque( maxlen=self.mosaic_trans.POOL_CAPACITY)
def __init__(self, cfg, is_train: bool = True): aug_kwargs = cfg.aug_kwargs aug_list = [ # T.Resize((800, 800)), ] if is_train: aug_list.extend([ getattr(T, name)(**kwargs) for name, kwargs in aug_kwargs.items() ]) self.augmentations = T.AugmentationList(aug_list) self.is_train = is_train mode = "training" if is_train else "inference" print( f"[MyDatasetMapper] Augmentations used in {mode}: {self.augmentations}" )
def __init__( self, is_train: bool, *, augmentations: List[Union[T.Augmentation, T.Transform]], image_format: str, use_instance_mask: bool = False, use_keypoint: bool = False, instance_mask_format: str = "polygon", keypoint_hflip_indices: Optional[np.ndarray] = None, precomputed_proposal_topk: Optional[int] = None, recompute_boxes: bool = False ): """ NOTE: this interface is experimental. Args: is_train: whether it's used in training or inference augmentations: a list of augmentations or deterministic transforms to apply image_format: an image format supported by :func:`detection_utils.read_image`. use_instance_mask: whether to process instance segmentation annotations, if available use_keypoint: whether to process keypoint annotations if available instance_mask_format: one of "polygon" or "bitmask". Process instance segmentation masks into this format. keypoint_hflip_indices: see :func:`detection_utils.create_keypoint_hflip_indices` precomputed_proposal_topk: if given, will load pre-computed proposals from dataset_dict and keep the top k proposals for each image. recompute_boxes: whether to overwrite bounding box annotations by computing tight bounding boxes from instance mask annotations. """ if recompute_boxes: assert use_instance_mask, "recompute_boxes requires instance masks" # fmt: off self.is_train = is_train self.augmentations = T.AugmentationList(augmentations) self.image_format = image_format self.use_instance_mask = use_instance_mask self.instance_mask_format = instance_mask_format self.use_keypoint = use_keypoint self.keypoint_hflip_indices = keypoint_hflip_indices self.proposal_topk = precomputed_proposal_topk self.recompute_boxes = recompute_boxes # fmt: on logger = logging.getLogger(__name__) logger.info("Augmentations used in training: " + str(augmentations))
def __init__(self, project, fileServer, augmentations, is_train, image_format='BGR', instance_mask_format='bitmask', recompute_boxes=True, classIndexMap=None): super(DatasetMapper, self).__init__() self.project = project self.fileServer = fileServer self.augmentations = augmentations if not isinstance(self.augmentations, T.AugmentationList): self.augmentations = T.AugmentationList(self.augmentations) self.is_train = is_train self.image_format = image_format self.instance_mask_format = instance_mask_format self.recompute_boxes = recompute_boxes self.classIndexMap = classIndexMap # used to map e.g. segmentation index values from AIDE to model self.keypoint_hflip_indices = None #TODO
def __init__( self, *, augmentations: List[Union[T.Augmentation, T.Transform]], image_format: str, panoptic_target_generator: Callable, ): """ NOTE: this interface is experimental. Args: augmentations: a list of augmentations or deterministic transforms to apply image_format: an image format supported by :func:`detection_utils.read_image`. panoptic_target_generator: a callable that takes "panoptic_seg" and "segments_info" to generate training targets for the model. """ # fmt: off self.augmentations = T.AugmentationList(augmentations) self.image_format = image_format # fmt: on logger = logging.getLogger(__name__) logger.info("Augmentations used in training: " + str(augmentations)) self.panoptic_target_generator = panoptic_target_generator
def build_train_dataloader(cfg): # like 'build_detection_train_loader' if 'coco_2017_train' in cfg.DATASETS.TRAIN: descs_train: List[Dict] = DatasetCatalog.get("coco_2017_train") ds_train = DatasetFromList(descs_train, copy=False) mapper = DatasetMapper(cfg, True) else: # Open-Image-Dataset if 'get_detection_dataset_dicts': all_descs_train: List[Dict] = DatasetCatalog.get("oid_train") if 'rebalancing': image_id_vs_idx = {} for idx, desc in enumerate(all_descs_train): image_id_vs_idx[desc['image_id']] = idx descs_train = list(map(lambda img_id: all_descs_train[image_id_vs_idx[img_id]], sample_image_ids())) print('_' * 50 + f'train dataset len: {len(descs_train)}') ds_train = DatasetFromList(descs_train, copy=False) if 'DatasetMapper': augs = [RandomContrast(0.8, 1.2), RandomBrightness(0.8, 1.2), RandomSaturation(0.8, 1.2)] augs.extend(build_augmentation(cfg, is_train=True)) mapper = make_mapper('oid_train', is_train=True, augmentations=T.AugmentationList(augs)) ds_train = MapDataset(ds_train, mapper) sampler = TrainingSampler(len(ds_train)) data_loader = build_batch_data_loader( ds_train, sampler, cfg.SOLVER.IMS_PER_BATCH, aspect_ratio_grouping=cfg.DATALOADER.ASPECT_RATIO_GROUPING, num_workers=cfg.DATALOADER.NUM_WORKERS, ) global DATA_LOADER DATA_LOADER = data_loader return data_loader
from detectron2.utils.visualizer import Visualizer, ColorMode import detectron2.data.transforms as T import numpy as np import torch from utils.trainer import InvertColors augs = T.AugmentationList( [ InvertColors(), T.Resize((300,800)), # T.RandomContrast(1.5, 2.5), T.PadTransform(100, 100, 100, 100), ] ) def augment(im): input = T.AugInput(im) transform = augs(input) # type: T.Transform x = input.image # new image return x def sort_predictions(outputs): pred_classes = [] scores = [] for out in outputs: idxs = np.argsort(out["instances"].pred_boxes.tensor.to('cpu')[:,0]) pred_classes.append(out["instances"].pred_classes[idxs]) scores.append(out["instances"].scores[idxs])
from detectron2.utils.visualizer import Visualizer, ColorMode import detectron2.data.transforms as T import numpy as np import torch from utils.trainer import InvertColors augs = T.AugmentationList([ T.Resize((600, 400)), T.RandomContrast(1.5, 2.5), T.PadTransform(100, 100, 100, 100), ]) def augment(im): input = T.AugInput(im) transform = augs(input) # type: T.Transform x = input.image # new image return x def sort_predictions(outputs): pred_classes = [] scores = [] for out in outputs: idxs = np.argsort(out["instances"].pred_boxes.tensor.to('cpu')[:, 0]) pred_classes.append(out["instances"].pred_classes[idxs]) scores.append(out["instances"].scores[idxs]) return pred_classes, scores
from detectron2.data import DatasetMapper from detectron2.utils.visualizer import Visualizer import os import cv2 import random import numpy as np print(torch.__version__) import Params as P USE_SAVED_MODEL = True SHOW_INPUTS = False augs = transforms.AugmentationList([ transforms.RandomBrightness(0.5, 1.5), transforms.RandomContrast(0.5, 1.5), transforms.RandomSaturation(0.5, 1.5), transforms.RandomFlip(prob=0.5), transforms.RandomExtent(scale_range=(0.1, 3), shift_range=(0.5, 0.5)), transforms.Resize(P.CNN_INPUT_SHAPE) ]) class Trainer(DefaultTrainer): @classmethod def build_train_loader(cls, cfg): mapper = DatasetMapper(cfg, is_train=True, augmentations=augs) return build_detection_train_loader(cfg, mapper=mapper) for d in ["train"]: #, "valid" with open(P.DATASET_DIR + d + "/labels.json", 'r') as fp: dataset_dicts = json.load(fp)
def __call__(self, dataset_dict): """Loads image & attributes into the dict, returns a pair - for the original and the flipped ones. Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. See full list of keys here: https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html Returns: tuple(dict, dict): a tuple where the first dict contains the data for the image augmented in a default way, and the second dict contains the same image but x-flipped Most of code comes from the original `__call__`. The only difference is the last few lines of code. There, the list of transforms is extended with an additional x-flip and its applied to the image. Note that it may happen that the resulting transforms list will have two x-flips (which is effectively no flip) and one may reason we could simply keep the original image untouched and flip its copy. However, we want to keep things as it is because only the original image (in the first dict) is used for the supervised training and the x-flipped image is used only for CSD loss. So if the original image would never get x-flipped, the model effectively will never be trained on x-flipped images. """ # Load the image (D2's original code) dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None def apply_image_augmentations(image, dataset_dict, sem_seg_gt, augmentations): """Applies given augmentation to the given image and its attributes (segm, instances, etc). Almost no changes from D2's original code (apart from erasing non-relevant portions, e.g. for keypoints), just wrapped it in a function to avoid duplicate code.""" aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes( ) dataset_dict["instances"] = utils.filter_empty_instances( instances) return dataset_dict, transforms # Store the copies of image and its metadata for the future x-flip dataset_dict_flipped, image_flipped, sem_seg_gt_flipped = ( dataset_dict.copy(), image.copy(), sem_seg_gt.copy() if sem_seg_gt else None, ) # Augment the original image original_dataset_dict, transforms = apply_image_augmentations( image, dataset_dict, sem_seg_gt, self.augmentations) # Extend instantiated transforms with an additional x-flip in the end; see `TransformList.`__add__` transforms_w_flip = transforms + T.HFlipTransform(image.shape[1]) # Transform Transforms to Augmentations; to learn more on how they differ you can check my note here: # https://www.notion.so/vlfom/How-augmentations-work-in-DatasetMapper-a4832df03489429ba04b9bc8d0e12dc6 augs_w_flip = T.AugmentationList(transforms_w_flip) # Obtain the x-flipped data flipped_dataset_dict, _ = apply_image_augmentations( image_flipped, dataset_dict_flipped, sem_seg_gt_flipped, augs_w_flip) return (original_dataset_dict, flipped_dataset_dict)