Пример #1
0
    def __call__(self, dataset_dict):
        """
        Args:
            dict: a detection dataset dict

        Returns:
            list[dict]:
                a list of dataset dicts, which contain augmented version of the input image.
                The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
        """
        ret = []
        if "image" not in dataset_dict:
            numpy_image = read_image(dataset_dict["file_name"],
                                     self.image_format)
        else:
            numpy_image = dataset_dict["image"].permute(
                1, 2, 0).numpy().astype("uint8")

        image_sizes = [(min_size, self.max_size)
                       for min_size in self.min_sizes]
        image_sizes.extend(self.extra_sizes)

        for min_size, max_size in image_sizes:
            image = np.copy(numpy_image)
            tfm = ResizeShortestEdge(min_size, max_size).get_transform(image)
            resized = tfm.apply_image(image)
            resized = torch.as_tensor(
                resized.transpose(2, 0, 1).astype("float32"))

            dic = copy.deepcopy(dataset_dict)
            dic["horiz_flip"] = False
            dic["image"] = resized
            ret.append(dic)

            if self.flip:
                dic = copy.deepcopy(dataset_dict)
                dic["horiz_flip"] = True
                dic["image"] = torch.flip(resized, dims=[2])
                ret.append(dic)

        return ret
Пример #2
0
 def _read_data(self, file_name):
     return read_image(file_name, format=self.data_format)
Пример #3
0
                    img = img[:, :, [2, 1, 0]]
                else:
                    img = np.asarray(
                        Image.fromarray(img,
                                        mode=cfg.INPUT.FORMAT).convert("RGB"))

                visualizer = Visualizer(img, metadata=metadata, scale=scale)
                target_fields = per_image["instances"].get_fields()
                labels = [
                    metadata.thing_classes[i]
                    for i in target_fields["gt_classes"]
                ]
                vis = visualizer.overlay_instances(
                    labels=labels,
                    boxes=target_fields.get("gt_boxes", None),
                    masks=target_fields.get("gt_masks", None),
                    keypoints=target_fields.get("gt_keypoints", None),
                )
                output(vis, str(per_image["image_id"]) + ".jpg")
    else:
        dicts = list(
            chain.from_iterable(
                [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
        if cfg.MODEL.KEYPOINT_ON:
            dicts = filter_images_with_few_keypoints(dicts, 1)
        for dic in tqdm.tqdm(dicts):
            img = utils.read_image(dic["file_name"], "RGB")
            visualizer = Visualizer(img, metadata=metadata, scale=scale)
            vis = visualizer.draw_dataset_dict(dic)
            output(vis, os.path.basename(dic["file_name"]))
Пример #4
0
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)

    demo = VisualizationDemo(cfg, parallel=args.parallel)

    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            assert args.input, "The input path(s) was not found"
        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(img)
            logger.info("{}: detected {} instances in {:.2f}s".format(
                path, len(predictions["instances"]),
                time.time() - start_time))

            if args.output:
                if os.path.isdir(args.output):
                    assert os.path.isdir(args.output), args.output
                    out_filename = os.path.join(args.output,
                                                os.path.basename(path))
                else:
                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
Пример #5
0
    def __getitem__(self, index):
        """Load data, apply transforms, converto to Instances.
        """
        dataset_dict = copy.deepcopy(self.dataset_dicts[index])

        # read image
        image = read_image(dataset_dict["file_name"], format=self.data_format)
        check_image_size(dataset_dict, image)

        if "annotations" in dataset_dict:
            annotations = dataset_dict.pop("annotations")
            annotations = [
                ann for ann in annotations if ann.get("iscrowd", 0) == 0]
        else:
            annotations = None

        if "sem_seg_file_name" in dataset_dict:
            if annotations is None:
                annotations = []
            with PathManager.open(dataset_dict.get("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")

            annotations.insert(0, {"sem_seg": sem_seg_gt})

        # apply transfrom
        image, annotations = self._apply_transforms(
            image, annotations, keypoint_hflip_indices=self.keypoint_hflip_indices)

        # mosaic transform
        mosaic_flag = np.random.randint(2)
        if self.is_train and self.mosaic is not None and mosaic_flag == 1:
            min_offset = self.mosaic.get('MIN_OFFSET', 0.2)
            mosaic_width = self.mosaic.get('MOSAIC_WIDTH', 640)
            mosaic_height = self.mosaic.get('MOSAIC_HEIGHT', 640)
            cut_x = np.random.randint(int(mosaic_width * min_offset),
                                      int(mosaic_width * (1 - min_offset)))
            cut_y = np.random.randint(int(mosaic_height * min_offset),
                                      int(mosaic_height * (1 - min_offset)))
            # init out image
            out_image = np.zeros([mosaic_height, mosaic_width, 3],
                                 dtype=np.float32)
            out_annotations = []
            # mosaic transform
            for m_idx in range(4):
                if m_idx != 0:
                    new_index = np.random.choice(
                        range(len(self.dataset_dicts)))
                    dataset_dict = copy.deepcopy(self.dataset_dicts[new_index])
                    # read image
                    image = read_image(dataset_dict["file_name"],
                                       format=self.data_format)
                    check_image_size(dataset_dict, image)
                    if "annotations" in dataset_dict:
                        annotations = dataset_dict.pop("annotations")
                        annotations = [
                            ann for ann in annotations if
                            ann.get("iscrowd", 0) == 0]
                    else:
                        annotations = None
                    # apply transfrom
                    image, annotations = self._apply_transforms(image,
                                                                annotations)

                image_size = image.shape[:2]  # h, w
                # as all meta_infos are the same, we just keep the first one
                meta_infos = \
                [annotation.pop("meta_infos") for annotation in annotations][0]
                pleft = meta_infos.get('jitter_pad_left', 0)
                pright = meta_infos.get('jitter_pad_right', 0)
                ptop = meta_infos.get('jitter_pad_top', 0)
                pbot = meta_infos.get('jitter_pad_bot', 0)
                swidth = meta_infos.get('jitter_swidth', image_size[1])
                sheight = meta_infos.get('jitter_sheight', image_size[0])
                # get shifts
                left_shift = int(
                    min(cut_x, max(0, (-int(pleft) * image_size[1] / swidth))))
                top_shift = int(
                    min(cut_y, max(0, (-int(ptop) * image_size[0] / sheight))))
                right_shift = int(min(image_size[1] - cut_x, max(0, (
                            -int(pright) * image_size[1] / swidth))))
                bot_shift = int(min(image_size[0] - cut_y, max(0, (
                            -int(pbot) * image_size[0] / sheight))))
                out_image, annos = self._blend_moasic(cut_x, cut_y,
                                                      out_image, image,
                                                      copy.deepcopy(
                                                          annotations),
                                                      (mosaic_height,
                                                       mosaic_width), m_idx,
                                                      (left_shift, top_shift,
                                                       right_shift, bot_shift))
                out_annotations.extend(annos)
            # replace image and annotation with out_image and out_annotation
            image = out_image
            annotations = out_annotations

        if "sem_seg_file_name" in dataset_dict:
            dataset_dict.pop("sem_seg_file_name")
            sem_seg_gt = annotations[0].pop("sem_seg")
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
            annotations = annotations[1:]

            if not annotations:
                annotations = None

        if annotations is not None:  # got instances in annotations
            image_shape = image.shape[:2]  # h, w

            instances = annotations_to_instances(
                annotations, image_shape, mask_format=self.mask_format
            )

            # # Create a tight bounding box from masks, useful when image is cropped
            # if self.crop_gen and instances.has("gt_masks"):
            #     instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

            dataset_dict["instances"] = filter_empty_instances(instances)

        # convert to Instance type
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        # h, w, c -> c, h, w
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        return dataset_dict