Пример #1
0
    def test_augmentation_input_args(self):
        input_shape = (100, 100)
        output_shape = (50, 50)

        # define two augmentations with different args
        class TG1(T.Augmentation):
            input_args = ("image", "sem_seg")

            def get_transform(self, image, sem_seg):
                return T.ResizeTransform(input_shape[0], input_shape[1],
                                         output_shape[0], output_shape[1])

        class TG2(T.Augmentation):
            def get_transform(self, image):
                assert image.shape[:2] == output_shape  # check that TG1 is applied
                return T.HFlipTransform(output_shape[1])

        image = np.random.rand(*input_shape).astype("float32")
        sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8")
        inputs = T.StandardAugInput(image, sem_seg=sem_seg)  # provide two args
        tfms = inputs.apply_augmentations([TG1(), TG2()])
        self.assertIsInstance(tfms[0], T.ResizeTransform)
        self.assertIsInstance(tfms[1], T.HFlipTransform)
        self.assertTrue(inputs.image.shape[:2] == output_shape)
        self.assertTrue(inputs.sem_seg.shape[:2] == output_shape)

        class TG3(T.Augmentation):
            input_args = ("image", "nonexist")

            def get_transform(self, image, nonexist):
                pass

        with self.assertRaises(AttributeError):
            inputs.apply_augmentations([TG3()])
Пример #2
0
    def test_augmentation_list(self):
        input_shape = (100, 100)
        image = np.random.rand(*input_shape).astype("float32")
        sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8")
        inputs = T.StandardAugInput(image, sem_seg=sem_seg)  # provide two args

        augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)])
        _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
        utils.check_image_size(dataset_dict, image)
        image = transform(image=image)["image"]

        ############################################################################

        '''
        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)

        
        h, w, _ = image.shape
        utils.check_image_size(dataset_dict, image)
        
        bboxes = [ann["bbox"] for ann in dataset_dict['annotations']]
        labels = [ann['category_id'] for ann in dataset_dict['annotations']]
        class_labels = [CLASSES[label] for label in labels]

        segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']]
        #cprint("before :" , segmentations)
        masks = convert_coco_poly_to_mask(segmentations, h, w)
        masks = [mask.numpy() for mask in masks]      
        
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks)

        image = transformed["image"]
        bboxes = transformed["bboxes"]

        class_labels = transformed["class_labels"]
        labels = [CLASSES.index(cl) for cl in class_labels]

        
        filtered_masks = []
        
        
        for mask in transformed["masks"]:
            #if len(np.unique(mask)) > 1:
                filtered_masks.append(mask)

        
        if len(bboxes) != len(filtered_masks):
            print(len(bboxes), len(filtered_masks), len(labels))
        
        #print(len(bboxes), len(masks), len(labels))
        seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks]
        
        for idx in range(len(labels)):
            dataset_dict['annotations'][idx]["bbox"] = bboxes[idx]
            dataset_dict['annotations'][idx]["labels"] = labels[idx]
            dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx]
        
        dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)]
        '''
        
        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
            )

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Пример #4
0
    def __call__(self, dataset_dict):
        """
                Args:
                    dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

                Returns:
                    dict: a format that builtin models in detectron2 accept
                """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)
            # @ Will Lee 精细分类类别:非标准,基本标准 or 标准
            standard_ids = [obj["standard_id"] for obj in annos]
            standard_ids = torch.tensor(standard_ids, dtype=torch.int64)
            instances.gt_standards = standard_ids

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Пример #5
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        #print("BELOW IS THE dataset_dict (FOR DEBUGGING)")
        #print(dataset_dict)
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(
                dataset_dict["file_name"], format=self.image_format
            )
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        boxes = np.asarray(
            [
                BoxMode.convert(
                    instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
                )
                for instance in dataset_dict["annotations"]
            ]
        )
        aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1))
        )
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("train2017", "thing_train2017")
                    .replace("image/train", "thing_train")
                )
            else:
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("coco", "lvis")
                    .replace("train2017", "thing_train")
                )
            # change extension to npz
            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        category = dataset_dict["annotations"][0]['category_id']

        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.image_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        if (int(category) != 5):
            boxes = np.asarray([
                BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                                BoxMode.XYXY_ABS)
                for instance in dataset_dict["annotations"]
            ])
            aug_input = T.StandardAugInput(image,
                                           boxes=boxes,
                                           sem_seg=sem_seg_gt)
            transforms = aug_input.apply_augmentations(self.augmentation)
            image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        else:
            boxes = np.asarray([0])

        # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        # transforms = aug_input.apply_augmentations(self.augmentation)
        # image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.

        if self.proposal_topk:
            if (int(category) != 5):
                utils.transform_proposals(
                    dataset_dict,
                    image_shape,
                    transforms,
                    proposal_topk=self.proposal_topk,
                    min_box_size=self.proposal_min_box_size,
                )

        if not self.is_train:
            if (int(category) != 5):
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                dataset_dict.pop("pano_seg_file_name", None)
                return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            if (int(category) != 5):
                annos = [
                    transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices,
                    ) for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
            segment_transform = transf.Compose([
                myTransform.FreeScaleMask((60, 100)),
                myTransform.MaskToTensor(),
            ])
            img_transform = transf.Compose([
                transf.Resize((288, 800)),
                transf.ToTensor(),
                transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ])
            if self.is_train:
                use_ax = True
            else:
                use_ax = False

            if (int(category) != 5):
                # dataset_dict['seg_label'] = torch.zeros([36,100,3])
                # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)]
                instances = annotations_to_instances(
                    annos, image_shape, mask_format=self.instance_mask_format)
                if self.recompute_boxes:
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )
                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

            else:
                cl = LaneClsDataset(
                    '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288',
                    img_path=dataset_dict['file_name'],
                    row_anchor=culane_row_anchor,
                    seg_path=dataset_dict['annotations'][0]['lanefilepath'],
                    segment_transform=segment_transform,
                    use_aux=use_ax)
                if use_ax:
                    img, cls, seg = cl.get_item()
                else:
                    img, cls = cl.get_item()
                    seg = 0
                # print('hahahahahahahahahah')img
                # import pdb; pdb.set_trace()
                # dataset_dict["image"] = img
                dataset_dict['seg_label'] = seg
                dataset_dict['cls_label'] = cls
                #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format)
            # Call lane class return label,...

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # if self.recompute_boxes:
            #     instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            # dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "train2017",
                    "thing_train2017").replace("image/train", "thing_train"))
            else:
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "coco", "lvis").replace("train2017", "thing_train"))
            # change extension to npz

            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt

        return dataset_dict
Пример #7
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        try:
            utils.check_image_size(dataset_dict, image)
        except Exception as e:
            print(e)
            import moxing as mox
            mox.file.copy_parallel(dataset_dict["file_name"],
                                   's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' +
                                   dataset_dict["file_name"].split('/')[-1])
            print(image.shape)
            image = np.rot90(image)
            print(image.shape)
            utils.check_image_size(dataset_dict, image)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg
        image_shape = image_weak_aug.shape[:2]  # h, w

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )

            if self.compute_tight_boxes and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

            bboxes_d2_format = utils.filter_empty_instances(instances)
            dataset_dict["instances"] = bboxes_d2_format

        # apply strong augmentation
        # We use torchvision augmentation, which is not compatiable with
        # detectron2, which use numpy format for images. Thus, we need to
        # convert to PIL format first.
        image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB")
        image_strong_aug = np.array(self.strong_augmentation(image_pil))
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1))
        )

        dataset_dict_key = copy.deepcopy(dataset_dict)
        dataset_dict_key["image"] = torch.as_tensor(
            np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1))
        )
        assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1)
        assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2)
        return (dataset_dict, dataset_dict_key)
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        imgh, imgw = image.shape[:2]
        utils.check_image_size(dataset_dict, image)
        transform = train_transforms()
        country = dataset_dict['file_name'].split('/')[-1].split('_')[0]
        #Augment the image
        for category_id, sample_prob in enumerate(self.sample_probs[country]):
            if np.random.random() <= sample_prob:
                damage_obj = sample_a_damage_of_type(
                    self.dataset_dicts_to_sample, category_id)
                if "annotations" not in dataset_dict:
                    dataset_dict["annotations"] = []
                # Duplicate the damage at the index
                image = copy.deepcopy(image)
                image.setflags(write=1)

                # damage = damage_obj['damage_masked']
                damage = damage_obj['damage']
                # the place to put
                posx, posy = random.sample(self.all_locs, 1)[0]
                dh, dw = damage.shape[:2]
                bboxes = np.array(
                    [obj['bbox'] for obj in dataset_dict['annotations']])
                counter = 0
                while len(bboxes) > 0 and check_conflict_boxes(
                    [posx, posy, posx + dw, posy + dh], bboxes):
                    posx, posy = random.sample(self.all_locs, 1)[0]
                    counter += 1
                    # only try for 1000 times maximum
                    if counter > 1000:
                        break
                # make sure that we don't place it out of the picture
                posy = min(posy, imgh - dh)
                posx = min(posx, imgw - dw)
                # make sure the damage is not out of bounds
                posy = 0 if posy < 0 else posy
                posx = 0 if posx < 0 else posx
                dh = imgh - posy if posy + dh > imgh else dh
                dw = imgw - posx if posx + dw > imgw else dw
                damage = damage[0:imgh, 0:imgw]

                # scale its color to its underlying range
                area_tobe_replaced = image[posy:posy + dh, posx:posx + dw]
                # Also transfer the color from the original picture to this
                damage = color_transfer(area_tobe_replaced, damage)

                # rotate it
                if category_id == 0 or category_id == 1:
                    damage = rotate_image(damage, random.randint(-5, 5))
                if category_id == 2 or category_id == 3:
                    damage = rotate_image(damage, random.randint(-30, 30))
                dh, dw = damage.shape[:2]

                # Build the mask to avoid the black due to rotation
                mask = np.full((imgh, imgw), False)  # default to not set all

                mask1 = damage.max(axis=2) > 0
                mask[posy:posy + dh, posx:posx + dw] = mask1
                image[mask] = damage[mask1]
                image.setflags(write=0)
                # change the box location of the annotation
                damage_obj['annotation']['bbox'] = [
                    posx, posy, posx + dw, posy + dh
                ]
                # Add the annotation to the set
                dataset_dict["annotations"].append(damage_obj['annotation'])

        # TODO: Augmentation comes here
        if "annotations" in dataset_dict and len(
                dataset_dict['annotations']) > 0:
            bboxes = np.array(
                [obj['bbox'] for obj in dataset_dict['annotations']])
            # Make sure the bounding boxes are not out  of ranges
            bw = bboxes[:, 2] - bboxes[:, 0]
            bh = bboxes[:, 3] - bboxes[:, 1]
            bw[bw <= 0] = 1
            bh[bh <= 0] = 1

            bboxes[:, 0] = np.maximum(bboxes[:, 0], 0)
            bboxes[:, 0] = np.minimum(bboxes[:, 0], imgw - 1)
            bboxes[:, 1] = np.maximum(bboxes[:, 1], 0)
            bboxes[:, 1] = np.minimum(bboxes[:, 1], imgh - 1)
            bboxes[:, 2] = bboxes[:, 0] + bw
            bboxes[:, 3] = bboxes[:, 1] + bh

            class_labels = np.array(
                [obj['category_id'] for obj in dataset_dict['annotations']])

            if transform:
                for i in range(10):
                    sample = {
                        'image': image,
                        'bboxes': bboxes,
                        'class_labels': class_labels
                    }
                    sample = transform(**sample)

                    if len(sample['bboxes']) > 0:
                        image = sample['image']
                        bboxes = torch.stack(
                            tuple(map(torch.tensor,
                                      zip(*sample['bboxes'])))).permute(
                                          1, 0).numpy()
                        class_labels = sample['class_labels']
                        break
                # Update the annotations
                annotations = []
                bbox_mode = dataset_dict.pop("annotations")[0]['bbox_mode']
                for i in range(len(bboxes)):
                    annotations.append({
                        'bbox': bboxes[i],
                        'bbox_mode': bbox_mode,
                        'category_id': class_labels[i]
                    })
                dataset_dict["annotations"] = annotations

        if "annotations" in dataset_dict and len(
                dataset_dict["annotations"]) > 0:
            bboxes = np.array(
                [obj['bbox'] for obj in dataset_dict['annotations']])
            aug_input = T.StandardAugInput(image, boxes=bboxes)

            apply_augmentations(self.augmentations, aug_input)

            image = aug_input.image
            image_shape = image.shape[:2]  # height, width

            # USER: Implement additional transformations if you have other types of data
            dataset_dict["image"] = torch.as_tensor(
                np.ascontiguousarray(image.transpose(2, 0, 1)))

            for i, obj in enumerate(dataset_dict["annotations"]):
                if obj.get("iscrowd", 0) == 0:
                    obj['bbox'] = aug_input.boxes[i]

            annos = [obj for obj in dataset_dict["annotations"]
                     ]  # keep for visualization purposes

            if not self.for_vis:
                dataset_dict.pop(
                    'annotations'
                )  # remove annotations if we don't need it for visualization

            instances = utils.annotations_to_instances(annos, image_shape)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.

            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Пример #9
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        original_image = image

        if self.crop_gen is None or np.random.rand() > 0.5:
            tfm_gens = self.tfm_gens
        else:
            tfm_gens = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:]

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(original_image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(tfm_gens)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            return dataset_dict

        if type(transforms[0]) is FT.NoOpTransform:
            flip = 0
        elif type(transforms[0]) is FT.HFlipTransform:
            flip = 1
        else:
            flip = 2
        dataset_dict["flip"] = flip

        if sem_seg_gt is not None:
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            if self.sem_seg_unlabeled_region_on:
                sem_seg_gt[sem_seg_gt ==
                           self.ignore_value] = self.num_sem_seg_classes
            dataset_dict["sem_seg"] = sem_seg_gt

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(obj, transforms,
                                                     image_shape)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(annos, image_shape)
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

            if self.unseen_label_set is not None:
                dataset_dict["instances"] = filter_unseen_class(
                    dataset_dict["instances"], self.unseen_label_set)

        if self.unlabeled_region_on:
            if self.sem_seg_unlabeled_region_on:
                cum_sem_seg = cum_map(dataset_dict["sem_seg"],
                                      self.num_sem_seg_classes)
            else:
                cum_sem_seg = cum_map(dataset_dict["sem_seg"],
                                      self.ignore_value)
            dataset_dict["integral_sem_seg"] = cum_sem_seg

        return dataset_dict