def tta_fn(image, rois): image = image.permute(1, 2, 0).to('cpu').numpy() dtype = image.dtype image = image.astype(np.uint8) out_images, out_rois = [], [] for tfm_gen in size_gens: resized_image, tfm = T.apply_transform_gens([tfm_gen], image) resized_rois = tfm.transforms[0].apply_box(rois.to('cpu').numpy()) if cfg.TEST.AUG.FLIP: flipped_image, tfm = T.apply_transform_gens([flip], resized_image) flipped_rois = tfm.transforms[0].apply_box(resized_rois) img_batch = torch.stack([ torch.from_numpy(resized_image.astype(dtype)).permute(2,0,1), torch.from_numpy(flipped_image.astype(dtype)).permute(2,0,1) ]) roi_batch = [ torch.from_numpy(resized_rois), torch.from_numpy(flipped_rois) ] else: img_batch = torch.from_numpy(resized_image.astype(dtype)).permute(2,0,1).unsqueeze(0) roi_batch = [torch.from_numpy(resized_rois),] out_images.append(img_batch) out_rois.append(roi_batch) return out_images, out_rois
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) except OSError: return if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: if np.random.rand() > 0.5: image, transforms = T.apply_transform_gens( self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def test_build_transform_gen(self): default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg.INPUT.MIN_SIZE_TRAIN = (30, ) default_cfg.INPUT.MIN_SIZE_TEST = 30 trans_train = build_transform_gen(default_cfg, is_train=True) trans_test = build_transform_gen(default_cfg, is_train=False) img = np.zeros((80, 60, 3)) trans_img_train, tl_train = apply_transform_gens(trans_train, img) trans_img_test, tl_test = apply_transform_gens(trans_test, img) self.assertEqual(trans_img_train.shape, (40, 30, 3)) self.assertEqual(trans_img_test.shape, (40, 30, 3))
def test_build_transform_gen_resize_square(self): default_cfg = Detectron2GoRunner().get_default_cfg() default_cfg.INPUT.MIN_SIZE_TRAIN = (30, ) default_cfg.INPUT.MIN_SIZE_TEST = 40 default_cfg.D2GO_DATA.AUG_OPS.TRAIN = ["ResizeShortestEdgeSquareOp"] default_cfg.D2GO_DATA.AUG_OPS.TEST = ["ResizeShortestEdgeSquareOp"] trans_train = build_transform_gen(default_cfg, is_train=True) trans_test = build_transform_gen(default_cfg, is_train=False) img = np.zeros((80, 60, 3)) trans_img_train, tl_train = apply_transform_gens(trans_train, img) trans_img_test, tl_test = apply_transform_gens(trans_test, img) self.assertEqual(trans_img_train.shape, (30, 30, 3)) self.assertEqual(trans_img_test.shape, (40, 40, 3))
def test_apply_rotated_boxes_unequal_scaling_factor(self): np.random.seed(125) h, w = 400, 200 newh, neww = 800, 800 image = np.random.rand(h, w) transform_gen = [] transform_gen.append(T.Resize(shape=(newh, neww))) image, transforms = T.apply_transform_gens(transform_gen, image) image_shape = image.shape[:2] # h, w assert image_shape == (newh, neww) boxes = np.array( [ [150, 100, 40, 20, 0], [150, 100, 40, 20, 30], [150, 100, 40, 20, 90], [150, 100, 40, 20, -90], ], dtype=np.float64, ) transformed_boxes = transforms.apply_rotated_box(boxes) expected_bboxes = np.array( [ [600, 200, 160, 40, 0], [600, 200, 144.22205102, 52.91502622, 49.10660535], [600, 200, 80, 80, 90], [600, 200, 80, 80, -90], ], dtype=np.float64, ) err_msg = "transformed_boxes = {}, expected {}".format(transformed_boxes, expected_bboxes) assert np.allclose(transformed_boxes, expected_bboxes), err_msg
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept (classification only) """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) return dataset_dict
def custom_mapper(dataset_dict): # it will be modified by code below dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize((512, 512)), T.RandomBrightness(0.8, 1.8), T.RandomContrast(0.6, 1.3), T.RandomSaturation(0.8, 1.4), T.RandomRotation(angle=[30, 30]), T.RandomLighting(0.7), T.RandomFlip(prob=0.4, horizontal=False, vertical=True), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict, size, flip_prob, min_brightness, max_brightness, \ min_contrast, max_contrast, min_saturation, max_saturation): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = detection_utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize(size), T.RandomBrightness(min_brightness, max_brightness), T.RandomContrast(min_contrast, max_contrast), T.RandomSaturation(min_saturation, max_saturation), T.RandomFlip(prob=flip_prob, horizontal=False, vertical=True), T.RandomFlip(prob=flip_prob, horizontal=True, vertical=False), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ detection_utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = detection_utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = detection_utils.filter_empty_instances(instances) return dataset_dict
def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([ T.RandomFlip(prob=0.50, horizontal=True, vertical=False), T.RandomApply(tfm_or_aug=T.RandomBrightness(intensity_min=0.7, intensity_max=1.1), prob=0.40), T.RandomApply(tfm_or_aug=T.RandomSaturation(intensity_min=0.7, intensity_max=1.1), prob=0.40) ], image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def custom_mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") # transform_list = [T.Resize(800,600), # T.RandomFlip(prob=0.5, horizontal=True, vertical=True), # T.RandomContrast(0.8, 3), # T.RandomBrightness(0.8, 1.6), # ] transform_list = [ #T.Resize((800, 800)), T.RandomContrast(0.8, 3), T.RandomBrightness(0.8, 1.6), T.RandomFlip(prob=0.5, horizontal=False, vertical=True), T.RandomFlip(prob=0.5, horizontal=True, vertical=False) ] ### 数据增强方式 image, transforms = T.apply_transform_gens(transform_list, image) ## # 数组增强 dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) ##转成Tensor annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image.shape[:2]) # 将标注转成Instance(Tensor) dataset_dict["instances"] = utils.filter_empty_instances( instances) ## 去除空的 return dataset_dict
def mapper(dataset_dict): # 自定义mapper dataset_dict = copy.deepcopy(dataset_dict) # 后面要改变这个dict,所以先复制 image = utils.read_image(dataset_dict["file_name"], format="BGR") # 读取图片,numpy array # image, transforms = T.apply_transform_gens( # [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), T.RandomRotation(angle=[0, 180]), # T.RandomFlip(prob=0.4, horizontal=False, vertical=True), T.RandomCrop('relative_range', (0.4, 0.6))], image) # 数组增强 # image, transforms = T.apply_transform_gens( # [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), # T.RandomFlip(prob=0.4, horizontal=True, vertical=False), T.RandomCrop('relative_range', (0.4, 0.6))], image) image, transforms = T.apply_transform_gens( [T.Resize((800, 800)), T.RandomContrast(0.1, 3), T.RandomSaturation(0.1, 2), T.RandomFlip(prob=0.4, horizontal=True, vertical=False)], image) # 数组增强 dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # 转成Tensor annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # 数据增强要同步标注 instances = utils.annotations_to_instances(annos, image.shape[:2]) # 将标注转成Instance(Tensor) dataset_dict["instances"] = utils.filter_empty_instances(instances) # 去除空的 return dataset_dict
def __call__(self, dataset_dict): """ Transform the dataset_dict according to the configured transformations. Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a new dict that's going to be processed by the model. It currently does the following: 1. Read the image from "file_name" 2. Transform the image and annotations 3. Prepare the annotations to :class:`Instances` """ # get 3D models for each annotations and remove 3D mesh models from image dict mesh_models = [] if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: mesh_models.append( [ self._all_mesh_models[anno["mesh"]][0].clone(), self._all_mesh_models[anno["mesh"]][1].clone(), ] ) dataset_dict = {key: value for key, value in dataset_dict.items() if key != "mesh_models"} dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if "annotations" in dataset_dict: for i, anno in enumerate(dataset_dict["annotations"]): anno["mesh"] = mesh_models[i] image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: annos = [ self.transform_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # Should not be empty during training instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): self.tfm_gens = [] dataset_dict = deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.is_train: # Crop if 'crop' in self.da.keys(): crop_gen = T.RandomCrop(self.da['crop']['type'], self.da['crop']['size']) self.tfm_gens.append(crop_gen) # Horizontal flip if 'flip' in self.da.keys(): flip_gen = T.RandomFlip( prob=self.da['flip']['prob'], horizontal=self.da['flip']['horizontal'], vertical=self.da['flip']['vertical']) self.tfm_gens.append(flip_gen) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.augmentation, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data # USER: Don't call transpose_densepose if you don't need annos = [ self._transform_densepose( utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices), transforms, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] if self.mask_on: self._add_densepose_masks_as_segmentation(annos, image_shape) instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") densepose_annotations = [obj.get("densepose") for obj in annos] if densepose_annotations and not all(v is None for v in densepose_annotations): instances.gt_densepose = DensePoseList(densepose_annotations, instances.gt_boxes, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file # image = utils.read_image(dataset_dict["file_name"], format=self.img_format) rota = 0 if self.rota_aug_on and dataset_dict["split"] != "val_mini" and dataset_dict["split"] != "test": rotaed_aug = [0, 90, 180, 270] rota = random.sample(rotaed_aug, 1)[0] image = read_image(dataset_dict["file_name"], format=self.img_format, rota=rota) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ transform_dota_instance_annotations( obj, image_shape, rota, transforms ) for obj in dataset_dict.pop("annotations") ] instances = dota_annotations_to_instances( annos, image_shape ) dataset_dict["instances"] = filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): self.tfm_gens = [] dataset_dict = deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.is_train: # Crop '''print("Augmentation: ", "T.RandomCrop('relative', [0.8, 0.4])") crop_gen = T.RandomCrop('relative', [0.8, 0.4]) self.tfm_gens.append(crop_gen)''' # Horizontal flip print("Augmentation: ", "T.RandomFlip(prob=0.5, horizontal=True, vertical=False)") flip_gen = T.RandomFlip(prob=0.5, horizontal=True, vertical=False) self.tfm_gens.append(flip_gen) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict['file_name'], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) elif np.random.rand() > 0.5: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) image_shape = image.shape[:2] dataset_dict['image'] = paddle.to_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop('annotations', None) return dataset_dict if 'annotations' in dataset_dict: for anno in dataset_dict['annotations']: if not self.mask_on: anno.pop('segmentation', None) anno.pop('keypoints', None) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop('annotations') if obj.get('iscrowd', 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict['instances'] = utils.filter_empty_instances(instances) return dataset_dict
def mapper(dataset_dict): # Implement a mapper, similar to the default DatasetMapper, but with your own customizations dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def mapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens([T.Resize((1152, 1152))], image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances_rotated(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): ret = super().__call__(dataset_dict=dataset_dict) numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() for angle in self.angles: rotate = RandomRotation(angle=angle, expand=True) new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image)) torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1))) dic = copy.deepcopy(dataset_dict) # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is # added at the beginning of each TransformList. That's '.transforms[0]'. dic["transforms"] = TransformList( [ret[-1]["transforms"].transforms[0]] + tfms.transforms ) dic["image"] = torch_image ret.append(dic) return ret
def custom_mapper(dataset_dict, transform_list): dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.augmentation, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] dataset_dict["instances"] = utils.annotations_to_instances( annos, image.shape[:2]) # # USER: Implement additional transformations if you have other types of data # # USER: Don't call transpose_densepose if you don't need # annos = [ # self._transform_densepose( # utils.transform_instance_annotations( # obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices # ), # transforms, # ) # for obj in dataset_dict.pop("annotations") # if obj.get("iscrowd", 0) == 0 # ] # instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") # dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of ONE video, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if self.is_train: dataset_dict = # TODO: sample a fixed number of frames new_dataset_dict = [] for item in dataset_dict: image = utils.read_image(item["filename"], format=self.img_format) utils.check_image_size(item, image) # TODO: SSD random crop image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image ) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) sample = {"image": image} if not self.is_train: new_dataset_dict.append(sample) continue # USER: Implement additional transformations if you have other types of data boxes = [ utils.transform_instance_annotations(box, transforms, image_shape) for box in item["boxes"] ] instances = # sample["instances"] = instances new_dataset_dict.append(sample) return new_dataset_dict
def test_apply_rotated_boxes(self): np.random.seed(125) cfg = get_cfg() is_train = True transform_gen = detection_utils.build_transform_gen(cfg, is_train) image = np.random.rand(200, 300) image, transforms = T.apply_transform_gens(transform_gen, image) image_shape = image.shape[:2] # h, w assert image_shape == (800, 1200) annotation = {"bbox": [179, 97, 62, 40, -56]} boxes = np.array([annotation["bbox"]], dtype=np.float64) # boxes.shape = (1, 5) transformed_bbox = transforms.apply_rotated_box(boxes)[0] expected_bbox = np.array([484, 388, 248, 160, 56], dtype=np.float64) err_msg = "transformed_bbox = {}, expected {}".format(transformed_bbox, expected_bbox) assert np.allclose(transformed_bbox, expected_bbox), err_msg
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) assert "sem_seg_file_name" in dataset_dict image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.is_train: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) if self.crop_gen: image, sem_seg_gt = crop_transform( image, sem_seg_gt, self.crop_gen, self.single_category_max_area, self.ignore_value, ) dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("sem_seg_file_name", None) return dataset_dict return dataset_dict
def __call__(self, dataset_dict): """ Args: dict: a dict in standard model input format. See tutorials for details. Returns: list[dict]: a list of dicts, which contain augmented version of the input image. The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. Each dict has field "transforms" which is a TransformList, containing the transforms that are used to generate this image. """ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy() shape = numpy_image.shape orig_shape = (dataset_dict["height"], dataset_dict["width"]) if shape[:2] != orig_shape: # It transforms the "original" image in the dataset to the input image pre_tfm = ResizeTransform(orig_shape[0], orig_shape[1], shape[0], shape[1]) else: pre_tfm = NoOpTransform() # Create all combinations of augmentations to use tfm_gen_candidates = [] # each element is a list[TransformGen] for min_size in self.min_sizes: resize = ResizeShortestEdge(min_size, self.max_size) tfm_gen_candidates.append([resize]) # resize only if self.flip: flip = RandomFlip(prob=1.0) tfm_gen_candidates.append([resize, flip]) # resize + flip # Apply all the augmentations ret = [] for tfm_gen in tfm_gen_candidates: new_image, tfms = apply_transform_gens(tfm_gen, np.copy(numpy_image)) torch_image = torch.from_numpy( np.ascontiguousarray(new_image.transpose(2, 0, 1))) dic = copy.deepcopy(dataset_dict) dic["transforms"] = pre_tfm + tfms dic["image"] = torch_image ret.append(dic) return ret
def customMapper(dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format="BGR") transform_list = [ T.Resize((600, 800)), T.RandomFlip(prob=0.6, horizontal=True, vertical=False), T.RandomFlip(prob=0.6, horizontal=False, vertical=True), ] image, transforms = T.apply_transform_gens(transform_list, image) dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ # Implement a mapper, similar to the default DatasetMapper, but with own customizations dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format="BGR") # Custom augs to be used while training # Only HFlip and Resize are supported for rotated_boxes augs = [T.RandomFlip(0.4, horizontal=True, vertical=False)] #[T.RandomRotation([0,90])] if self.is_train: tfm_gens = self.tfm_gens + augs else: tfm_gens = self.tfm_gens logging.getLogger(__name__).info("Original Augmentation: " + str(self.tfm_gens)) logging.getLogger(__name__).info("Updated Augmentation List: " + str(tfm_gens)) image, transforms = T.apply_transform_gens(tfm_gens, image) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) for a in dataset_dict['annotations']: a['bbox'] = transforms.apply_rotated_box(np.asarray( [a['bbox']]))[0].tolist() annos = dataset_dict['annotations'] instances = utils.annotations_to_instances_rotated( annos, image.shape[:2]) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict