Exemplo n.º 1
0
# MetadataCatalog.get("VisualGenomeObjects").thing_classes = obj_vocab

cfg = get_cfg()
cfg.merge_from_file(
    'configs/COCO-Detection/faster_rcnn_X_101_64x4d_FPN_2x_vlp.yaml')
cfg.freeze()

predictor = DefaultPredictor(cfg)
demo = VisualizationDemo(cfg)

#model = build_model(cfg)
#model.eval()

#checkpointer = DetectionCheckpointer(model)
#checkpointer.load(os.path.join('model_weights', 'e2e_faster_rcnn_X-101-64x4d-FPN_2x-vlp.pkl'))

img1 = read_image(os.path.join('test_images', '12283150_12d37e6389_z.jpg'),
                  format="BGR")
img2 = read_image(os.path.join('test_images', '25691390_f9944f61b5_z.jpg'),
                  format="BGR")
img3 = read_image(os.path.join('test_images', '9247489789_132c0d534a_z.jpg'),
                  format="BGR")

preds, vis_out = demo.run_on_image(img3, obj_vocab)

cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
cv2.imshow(WINDOW_NAME, vis_out.get_image()[:, :, ::-1])

#preds['instances'].get_fields()['box_features'].shape
#preds['instances'].get_fields()['probs'].shape
Exemplo n.º 2
0
    args = get_parser().parse_args()
    setup_logger(name="fvcore")
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)
    print(cfg)
    # 构建模型
    model = build_model(cfg)
    # 加载权重
    checkpointer = DetectionCheckpointer(model)
    checkpointer.load(cfg.MODEL.WEIGHTS)

    # 加载图像
    path = os.path.expanduser(args.input)
    original_image = read_image(path, format="BGR")
    height, width = original_image.shape[:2]
    transform_gen = T.ResizeShortestEdge(
        [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST],
        cfg.INPUT.MAX_SIZE_TEST)
    image = transform_gen.get_transform(original_image).apply_image(
        original_image)
    image = torch.as_tensor(image.astype("float32").transpose(
        2, 0, 1)).requires_grad_(True)

    inputs = {"image": image, "height": height, "width": width}

    # Grad-CAM
    layer_name = get_last_conv_name(model)
    grad_cam = GradCAM(model, layer_name)
    mask, box, class_id = grad_cam(inputs)  # cam mask
Exemplo n.º 3
0
import argparse

import cv2
from detectron2.data import MetadataCatalog
from detectron2.data.detection_utils import read_image
from detectron2.utils.visualizer import Visualizer

from model import get_predictor

parser = argparse.ArgumentParser(description='Inference football helmets on image')
required = parser.add_argument_group('required')
required.add_argument('-i', '--input', type=str, help='path to an image', required=True)
required.add_argument('-o', '--output', type=str, help='output path', required=True)
args = parser.parse_args()

predictor = get_predictor()
image = read_image(args.input, "BGR")
model_output = predictor(image)

model_output = model_output["instances"].to("cpu")
confident_predictions = model_output[model_output.scores > 0.8]

img = cv2.imread(args.input)
v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("nflimpact"))
img = v.draw_instance_predictions(confident_predictions).get_image()

success = cv2.imwrite(args.output, img)
if not success:
    raise ValueError("couldn't write image successfully, sorry opencv does not give helpful error messages")
Exemplo n.º 4
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Exemplo n.º 5
0
def inference(cfgs, logger, pseudo_label=False):
    # pseudo label
    pseudo_threshold = 0.4
    coco_annos = {
        "info": {},
        "licenses": [],
        "images": [],
        "annotations": [],
        "categories": [{
            "id": 1,
            "name": "wheat",
            "supercategory": "wheat",
            "skeleton": []
        }],
    }
    instance_id = 0
    predictors = []
    for cfg in cfgs:
        predictors.append(TTAPredictor(cfg))

    results = ["image_id,PredictionString\n"]
    if cfgs[0].INPUT_DIR:
        img_list = glob.glob(cfgs[0].INPUT_DIR + "/*.jpg")
        assert img_list, "The input path(s) was not found"
        for idx, path in tqdm.tqdm(enumerate(img_list)):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            img_size = img.shape[:2]
            start_time = time.time()
            predictions = []
            for predictor in predictors:
                predictions.append(predictor(img))

            # wbf
            merged_boxes = merge_multi_predictions(predictions,
                                                   img_size,
                                                   nms_threshold=0.6)
            predictions = {"instances": merged_boxes}

            # output result
            if not pseudo_label:
                result = format_result(
                    os.path.basename(path).split('.')[0], predictions)
                results.append(result)
            else:
                if 'instances' in predictions:
                    instances = predictions["instances"].to('cpu')
                    img_info, annos, instance_id = pred_instances_to_coco_json(
                        instances, path, idx, instance_id, img_size,
                        pseudo_threshold)
                    if img_info is not None and annos is not None:
                        coco_annos["images"].append(img_info)
                        coco_annos["annotations"].extend(annos)

            logger.info("{}: {} in {:.2f}s".format(
                path,
                "detected {} instances".format(len(predictions["instances"]))
                if "instances" in predictions else "finished",
                time.time() - start_time,
            ))

        if pseudo_label:
            # save pseudo label to json file
            json_name = cfgs[0].OUTPUT_DIR + '/pseudo_label.json'
            with open(json_name, 'w') as f:
                json.dump(coco_annos, f)
        else:
            with open(cfgs[0].OUTPUT_DIR + '/submission.csv', 'w') as f:
                f.writelines(results)
def rotated_mapper(original_dataset_dict):
    # Implement a mapper, similar to the default DatasetMapper, but with our own customizations

    dataset_dict = copy.deepcopy(
        original_dataset_dict)  # it will be modified by code below
    original_gsd = dataset_dict["gsd"]
    target_gsd = np.random.uniform(0.09, 0.13)  # randomize target gsd
    scale = original_gsd / target_gsd

    target_size = 400
    target_crop = int(target_size / scale)
    target_crop = (target_crop, target_crop)

    image_np = detection_utils.read_image(dataset_dict["file_name"],
                                          format="BGR")

    boxes = np.asarray([anno['bbox'] for anno in dataset_dict['annotations']])

    # select anno at random
    # draw random center

    # h, w = image_np.shape[:2]
    # rand_box = boxes[np.random.randint(len(boxes))]
    # ch, cw = rand_box[:2]
    # xmin = np.min()
    # xmax = np.max()
    # ymin = 3
    # ymax = 4

    # h0 = np.random.randint(min(h, ymin), min(h, ymax) + 1)
    # w0 = np.random.randint(min(w, xmin), min(w, xmax) + 1)
    # assert h >= target_crop[1] and w >= target_crop[0], "Shape computation has bugs."

    # crop = T.CropTransform(w0, h0, target_crop)

    # make sure random crop contains annotations
    i = 0
    while True:
        random_crop = T.RandomCrop('absolute',
                                   target_crop).get_transform(image_np)
        cropped_boxes = RotatedBoxes(
            random_crop.apply_coords(copy.deepcopy(boxes)))
        inside_ind = cropped_boxes.inside_box(target_crop)
        if 1 < sum(inside_ind) <= 100:
            break
        i += 1
        if i > 150:
            return None

    image, transforms = T.apply_transform_gens([
        random_crop,
        T.Resize((target_size, target_size)),
    ], image_np)
    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        rotated_transform_instance_annotations(obj, transforms,
                                               image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = detection_utils.annotations_to_instances_rotated(
        annos, image.shape[:2])
    instances = detection_utils.filter_empty_instances(instances)
    inside_ind = instances.gt_boxes.inside_box(image.shape[:2])
    instances = instances[inside_ind]

    assert ((instances.gt_boxes.tensor.numpy()[:, 2] > 0).all().item()
            ), "width not > 0\n\n" + str(instances.gt_boxes.tensor.numpy())

    dataset_dict["instances"] = instances
    return dataset_dict
Exemplo n.º 7
0
    def mapper(self, dataset_dict, train_type: str = 'kpt'):
        if train_type != 'kpt':
            for item in dataset_dict["annotations"]:
                if 'keypoints' in item:
                    del item['keypoints']

        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        img_h, img_w = image.shape[:2]
        num_pixels = img_w * img_h

        ann_dict = Detectron2_Annotation_Dict.from_dict(dataset_dict)
        bbox_list = [ann.bbox for ann in ann_dict.annotations]
        # if train_type == 'seg':
        #     printj.purple(len(ann_dict.annotations))
        #     for ann in ann_dict.annotations:
        #         seg = ann.segmentation
        #     mask = seg.to_mask()
        #     tranformed = self.aug(mask=mask)
        #     mask = tranformed['mask']
        #     image = tranformed['image']
            
        # else:
        image = self.aug(image=np.array(image))['image']
        seq_aug_for_no_seg = almost_always(iaa.Sequential(
            [
                # iaa.Rot90(ia.ALL, keep_size=False)
            ]
        ))
        seq_aug_for_seg = sometimes(iaa.Sequential(
            [
                iaa.Rot90(ia.ALL, keep_size=False),
                iaa.Affine(
                    scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                    translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                    rotate=(-180, 180),
                    order=[0, 1],
                    # cval=(0, 255),
                    cval=255,
                    mode=ia.ALL
                )
            ]
        ))
        imgaug_kpts = KeypointsOnImage(keypoints=[], shape=image.shape)
        imgaug_bboxes = BoundingBoxesOnImage(
            bounding_boxes=[], shape=image.shape)
        imgaug_polys = PolygonsOnImage(polygons=[], shape=image.shape)

        num_ann = len(ann_dict.annotations)
        num_kpts = None
        seg_len_list = []
        for ann in ann_dict.annotations:
            if num_kpts is None:
                num_kpts = len(ann.keypoints)
            if len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints) != len(ann_dict.annotations[0].keypoints):
                printj.red(
                    f'len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints) == {len(ann.keypoints.to_imgaug(img_shape=image.shape).keypoints)} != {len(ann_dict.annotations[0].keypoints)} == len(ann_dict.annotations[0].keypoints)')
                raise Exception
            imgaug_kpts.keypoints.extend(
                ann.keypoints.to_imgaug(img_shape=image.shape).keypoints)
            if ann.bbox.to_imgaug() is None:
                printj.red(f'ann.bbox.to_imgaug() is None')
                printj.red(f'ann.bbox: {ann.bbox}')
                raise Exception
            imgaug_bboxes.bounding_boxes.append(ann.bbox.to_imgaug())
            if ann.segmentation.to_imgaug(img_shape=image.shape).polygons is None:
                printj.red(
                    f'ann.segmentation.to_imgaug(img_shape=image.shape).polygons is None')
                printj.red(f'ann.segmentation:\n{ann.segmentation}')
                raise Exception
            seg_len_list.append(len(ann.segmentation))

            imgaug_polys.polygons.extend(
                ann.segmentation.to_imgaug(img_shape=image.shape).polygons)
        if len(imgaug_polys.polygons) > 0:
            if num_kpts > 0:
                image, imgaug_kpts_aug, imgaug_polys_aug = seq_aug_for_seg(
                    image=image, keypoints=imgaug_kpts, polygons=imgaug_polys)
            else:
                image, imgaug_polys_aug = seq_aug_for_seg(
                    image=image, polygons=imgaug_polys)
                imgaug_kpts_aug = None
            imgaug_bboxes_aug = None
        else:
            if num_kpts > 0:
                image, imgaug_kpts_aug, imgaug_bboxes_aug = seq_aug_for_no_seg(
                    image=image, keypoints=imgaug_kpts, bounding_boxes=imgaug_bboxes)
            else:
                image, imgaug_bboxes_aug = seq_aug_for_no_seg(
                    image=image, bounding_boxes=imgaug_bboxes)
                imgaug_kpts_aug = None
            imgaug_polys_aug = None

        kpts_aug0 = Keypoint2D_List.from_imgaug(
            imgaug_kpts=imgaug_kpts_aug) if num_kpts > 0 else Keypoint2D_List()
        kpts_aug_list = kpts_aug0.to_numpy(demarcation=True)[:, :2].reshape(
            num_ann, num_kpts, 2) if num_kpts > 0 else []
        kpts_aug_list = [[[x, y, 2] for x, y in kpts_aug]
                         for kpts_aug in kpts_aug_list]
        kpts_aug_list = [Keypoint2D_List.from_list(
            kpts_aug, demarcation=True) for kpts_aug in kpts_aug_list]

        if imgaug_polys_aug is not None and imgaug_bboxes_aug is None:
            poly_aug_list = [Polygon.from_imgaug(
                imgaug_polygon) for imgaug_polygon in imgaug_polys_aug.polygons]
            poly_aug_list_list = unflatten_list(
                poly_aug_list, part_sizes=seg_len_list)
            seg_aug_list = [Segmentation(poly_aug_list)
                            for poly_aug_list in poly_aug_list_list]
            bbox_aug_list = [seg_aug.to_bbox() for seg_aug in seg_aug_list]
            # Adjust BBoxes when Segmentation BBox does not contain all keypoints
            for i in range(len(bbox_aug_list)):
                kpt_points_aug = [
                    kpt_aug.point for kpt_aug in kpts_aug_list[i]] if num_kpts > 0 else []
                kpt_points_aug_contained = [kpt_point_aug.within(
                    bbox_aug_list[i]) for kpt_point_aug in kpt_points_aug]
                if len(kpt_points_aug) > 0:
                    if not np.any(np.array(kpt_points_aug_contained)):
                        printj.red(
                            f"Keypoints not contained in corresponding bbox.")
                    elif not np.all(np.array(kpt_points_aug_contained)):
                        pass
                    else:
                        break
        elif imgaug_polys_aug is None and imgaug_bboxes_aug is not None:
            bbox_aug_list = [BBox.from_imgaug(
                bbox_aug) for bbox_aug in imgaug_bboxes_aug.bounding_boxes]
            seg_aug_list = [None] * len(bbox_aug_list)
        else:
            printj.red(f'Unexpected error')
            raise Exception

        if num_kpts > 0:
            for ann, kpts_aug, bbox_aug, seg_aug in zip(ann_dict.annotations, kpts_aug_list, bbox_aug_list, seg_aug_list):
                ann.keypoints = kpts_aug
                ann.bbox = bbox_aug
                ann.segmentation = seg_aug if seg_aug is not None else Segmentation.from_list([
                ])
        else:
            for ann, bbox_aug, seg_aug in zip(ann_dict.annotations, bbox_aug_list, seg_aug_list):
                ann.keypoints = Keypoint2D_List()
                ann.bbox = bbox_aug
                ann.segmentation = seg_aug if seg_aug is not None else Segmentation.from_list([
                ])

        dataset_dict = ann_dict.to_dict()

        image, transforms = T.apply_transform_gens([], image)

        annots = []
        for item in dataset_dict["annotations"]:
            if 'keypoints' in item and num_kpts == 0:
                del item['keypoints']
            elif 'keypoints' in item:
                item['keypoints'] = np.array(
                    item['keypoints']).reshape(-1, 3).tolist()
            annots.append(item)
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        instances = utils.annotations_to_instances(annots, image.shape[:2])
        dataset_dict["instances"] = utils.filter_empty_instances(
            instances, by_box=True, by_mask=False)

        # if True:
        #     vis_img = image.copy()
        #     bbox_list = [BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()]
        #     seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons]
        #     kpts_list = [Keypoint2D_List.from_numpy(arr, demarcation=True) for arr in dataset_dict["instances"].gt_keypoints.tensor.numpy()] if hasattr(dataset_dict["instances"], 'gt_keypoints') else []
        #     for seg in seg_list:
        #         vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True)
        #     for bbox in bbox_list:
        #         vis_img = draw_bbox(img=vis_img, bbox=bbox)
        #     for kpts in kpts_list:
        #         vis_img = draw_keypoints(img=vis_img, keypoints=kpts.to_numpy(demarcation=True)[:, :2].tolist(), radius=6)
        #     aug_visualizer.step(vis_img)

        return dataset_dict
Exemplo n.º 8
0
def create_annotation(image_folder, json_path, confidence_thresh=0.8):
    json_dict = {
        "images": [],
        "type": "instances",
        "annotations": [],
        "categories": []
    }

    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)

    demo = VisualizationDemo(cfg)

    image_path = {}
    for path, subdirs, files in os.walk(image_folder):
        for name in files:
            print(name)
            if name.endswith('.jpg') or \
               name.endswith('.png') or \
               name.endswith('.JPG') or \
               name.endswith('.PNG') or \
               name.endswith('.jpeg') or \
               name.endswith('.JPEG'):
                image_path[name] = os.path.join(path, name)

    print("length: ", len(image_path.keys()))
    for path in tqdm.tqdm(image_path.keys(), disable=not args.output):
        # use PIL, to be consistent with evaluation
        start_time = time.time()
        try:
            img = read_image(image_path[path], format="BGR")
            # run detector
            predictions, visualized_output, shape = demo.run_on_image(img)
        except:
            print("except")
            continue
        height, width, channel = shape

        global count
        ## append image info
        image = {
            "file_name": str(path),
            "height": str(height),
            "width": str(width),
            "id": str(count),
        }
        count += 1
        # if count > 10:
        #     break
        json_dict["images"].append(image)
        ## append annotation info
        bnd_id = 0
        for i in range(len(predictions["instances"].pred_boxes)):
            if predictions["instances"].scores[
                    i] > confidence_thresh and predictions[
                        "instances"].pred_classes[i] in [0, 2, 5, 7]:
                # print(predictions["instances"].pred_boxes[i].tensor)
                x_center, y_center, o_width, o_height = predictions[
                    "instances"].pred_boxes[i].tensor[0].cpu().detach().numpy(
                    )
                score = predictions["instances"].scores[i].cpu().detach(
                ).numpy()
                pred_class = predictions["instances"].pred_classes[i].cpu(
                ).detach().numpy()

                # print(x_center, y_center, o_width, o_height, score)
                ann = {
                    "area":
                    str(o_width * o_height),
                    "iscrowd":
                    0,
                    "image_id":
                    str(count),
                    "bbox": [
                        str(int(x_center - o_width / 2)),
                        str(int(y_center - o_height / 2)),
                        str(o_width),
                        str(o_height)
                    ],
                    "category_id":
                    str(pred_class + 1),
                    "id":
                    str(bnd_id),
                    "ignore":
                    0,
                    "segmentation": [],
                }
                bnd_id += 1
                json_dict["annotations"].append(ann)

        # cat = {"supercategory": "none", "id": cid, "name": cate}
        # json_dict["categories"].append(cat)

        # if args.output:
        #     if os.path.isdir(args.output):
        #         assert os.path.isdir(args.output), args.output
        #         out_filename = os.path.join(args.output, os.path.basename(path))
        #     else:
        #         assert len(args.input) == 1, "Please specify a directory with args.output"
        #         out_filename = args.output
        #     visualized_output.save(out_filename)
        # print("pred_boxes: ", predictions["instances"].pred_boxes)
        # print("scores: ", predictions["instances"].scores)
        # print("pred_classes: ", predictions["instances"].pred_classes)
        # print("shape: ", width, height, channel)
        # logger.info(
        #     "{}: detected {} instances in {:.2f}s".format(
        #         path, len(predictions["instances"]), time.time() - start_time
        #     )
        # )
        logger.info(
            ("progress: {:.0f} / {:.0f}".format(count,
                                                len(image_path.keys()))))

    ## append category info
    cat = {"supercategory": "none", "id": str(1), "name": "person"}
    json_dict["categories"].append(cat)
    cat = {"supercategory": "none", "id": str(3), "name": "car"}
    json_dict["categories"].append(cat)
    cat = {"supercategory": "none", "id": str(6), "name": "bus"}
    json_dict["categories"].append(cat)
    cat = {"supercategory": "none", "id": str(8), "name": "truck"}
    json_dict["categories"].append(cat)

    os.makedirs(os.path.dirname(json_path), exist_ok=True)
    json_fp = open(json_path, "w")
    json_str = json.dumps(json_dict)
    json_fp.write(json_str)
    json_fp.close()
Exemplo n.º 9
0
        "--output",
        default=None,
        help="output images name",
    )
    parser.add_argument(
        "--confidence-threshold",
        type=float,
        default=0.5,
        help="Minimum score for instance predictions to be shown",
    )
    return parser

if __name__ == "__main__":
    args = get_parser().parse_args()
    cfg = setup_cfg(args)

    input = args.input
    output = args.output
    
    debug = output is not None
    demo = VisualizationDemo(cfg, debug)
    # use PIL, to be consistent with evaluation
    img = read_image(input, format="BGR")
    predictions, visualized_output, obj = demo.run_on_image(img, debug)

    if output != None:
        visualized_output.save(output)
        print(output)
    else:
        print(json.dumps(obj))
    
Exemplo n.º 10
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        # get sample information and read image
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        # check real image size match the sample description in dataset_dict
        utils.check_image_size(dataset_dict, image)

        # IMAGE AUGMENTATION
        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # these information no needed in train mode
        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("multi_labels", None)
            dataset_dict.pop("multi_label_names", None)
            dataset_dict.pop("seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(obj, transforms,
                                                     image_shape)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]

            instances = utils.annotations_to_instances(annos, image_shape)

            # 虽然用不上,但还是保留吧
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if "seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            # TODO: one-hot encoding after transformation
            sem_gt = np.zeros(
                (self.num_seg_class, sem_seg_gt.shape[0], sem_seg_gt.shape[1]),
                dtype="uint8")
            for c in range(self.num_seg_class):
                sem_gt[c][sem_seg_gt == c] = 1
            # sem_gt = np.transpose(sem_gt, (1, 2, 0))  # for later transform
            sem_gt = torch.as_tensor(sem_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_gt

        if "multi_labels" in dataset_dict:
            dataset_dict["multi_labels"] = dataset_dict.pop("multi_labels")

        return dataset_dict
Exemplo n.º 11
0
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        # this place you can add your own code to change the input image

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)
                if not self.attribute_on:
                    anno.pop("attribute_ids")

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances_with_attributes(
                annos,
                image_shape,
                mask_format=self.mask_format,
                load_attributes=self.attribute_on,
                max_attr_per_ins=self.max_attr_per_ins)
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
Exemplo n.º 12
0
def load_img(img_path, transform_gen):
    img1 = read_image(img_path, format="BGR")
    img = transform_gen.get_transform(img1).apply_image(img1)
    img_tensor = torch.as_tensor(img.astype("float32").transpose(2, 0, 1))
    return img1, img_tensor
Exemplo n.º 13
0
    parser.add_argument("--onlyhighest",
                        action="store_true",
                        help="will return only the highest scoring detection")

    parser.add_argument(
        "opts",
        help="Modify model config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    return parser


if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)

    im_name = args.input.split("/")[-1].split(".")[0]

    demo = VisualizationDemo(cfg,
                             vis_highest_scoring=args.onlyhighest,
                             output_dir=os.path.join(args.output, im_name))

    # use PIL, to be consistent with evaluation
    img = read_image(args.input, format="BGR")
    predictions = demo.run_on_image(img, focal_length=args.focal_length)
Exemplo n.º 14
0
 def load_image(self, dataset_dict):
     image = utils.read_image(dataset_dict["file_name"],
                              format=self.img_format)
     utils.check_image_size(dataset_dict, image)
     return image
Exemplo n.º 15
0
                    img = img[:, :, [2, 1, 0]]
                else:
                    img = np.asarray(
                        Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert(
                            "RGB"
                        )
                    )

                visualizer = Visualizer(img, metadata=metadata, scale=scale)
                target_fields = per_image["instances"].get_fields()
                labels = [
                    metadata.thing_classes[i]
                    for i in target_fields["gt_classes"]
                ]
                vis = visualizer.overlay_instances(
                    labels=labels,
                    boxes=target_fields.get("gt_boxes", None),
                )
                output(vis, str(per_image["image_id"]) + ".jpg")
    else:
        dicts = list(
            chain.from_iterable(
                [DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]
            )
        )
        for dic in dicts:
            img = utils.read_image(dic["file_name"], "RGB")
            visualizer = Visualizer(img, metadata=metadata, scale=scale)
            vis = visualizer.draw_dataset_dict(dic)
            output(vis, os.path.basename(dic["file_name"]))
Exemplo n.º 16
0
    demo = VisualizationDemo(cfg)
    all_img = []
    with open(args.input_txt, "r") as f:
        for line in f.readlines():
            line = line.strip('\n')
            all_img.append(line)

    if args.input_txt:
        #if len(args.input) == 1:
        #    args.input = glob.glob(os.path.expanduser(args.input[0]))
        #    assert args.input, "The input path(s) was not found"
        for path in tqdm.tqdm(all_img, disable=not args.output):
            # use PIL, to be consistent with evaluation
            path = path + '.jpg'
            img = read_image(
                '../../tmp/data/VOCdevkit2007/VOC2007/JPEGImages/' + path,
                format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(img)
            logger.info("{}: {} in {:.2f}s".format(
                path,
                "detected {} instances".format(len(predictions["instances"]))
                if "instances" in predictions else "finished",
                time.time() - start_time,
            ))

            if args.output:
                if os.path.isdir(args.output):
                    assert os.path.isdir(args.output), args.output
                    out_filename = os.path.join(args.output,
                                                os.path.basename(path))
Exemplo n.º 17
0
def main():
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    setup_logger(name="fvcore")
    logger = setup_logger()
    logger.info("Arguments: " + str(args))

    cfg = model_zoo.get_config(args.config_file)
    cfg = setup_cfg(cfg, args)
    demo = VisualizationDemo(cfg, args.config_file)

    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            assert args.input, "The input path(s) was not found"
        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(img)
            logger.info("{}: {} in {:.2f}s".format(
                path,
                "detected {} instances".format(len(predictions["instances"]))
                if "instances" in predictions else "finished",
                time.time() - start_time,
            ))

            if args.output:
                if os.path.isdir(args.output):
                    assert os.path.isdir(args.output), args.output
                    out_filename = os.path.join(args.output,
                                                os.path.basename(path))
                else:
                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
                cv2.imshow(WINDOW_NAME,
                           visualized_output.get_image()[:, :, ::-1])
                if cv2.waitKey(0) == 27:
                    break  # esc to quit
    elif args.video_input:
        video = cv2.VideoCapture(args.video_input)
        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frames_per_second = video.get(cv2.CAP_PROP_FPS)
        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
        basename = os.path.basename(args.video_input)

        if args.output:
            if os.path.isdir(args.output):
                output_fname = os.path.join(args.output, basename)
                output_fname = os.path.splitext(output_fname)[0] + ".mkv"
            else:
                output_fname = args.output
            assert not os.path.isfile(output_fname), output_fname
            output_file = cv2.VideoWriter(
                filename=output_fname,
                # some installation of opencv may not support x264 (due to its license),
                # you can try other format (e.g. MPEG)
                fourcc=cv2.VideoWriter_fourcc(*"x264"),
                fps=float(frames_per_second),
                frameSize=(width, height),
                isColor=True,
            )
        assert os.path.isfile(args.video_input)
        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
            if args.output:
                output_file.write(vis_frame)
            else:
                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
                cv2.imshow(basename, vis_frame)
                if cv2.waitKey(1) == 27:
                    break  # esc to quit
        video.release()
        if args.output:
            output_file.release()
        else:
            cv2.destroyAllWindows()
Exemplo n.º 18
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        try:
            utils.check_image_size(dataset_dict, image)
        except Exception as e:
            print(e)
            import moxing as mox
            mox.file.copy_parallel(dataset_dict["file_name"],
                                   's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' +
                                   dataset_dict["file_name"].split('/')[-1])
            print(image.shape)
            image = np.rot90(image)
            print(image.shape)
            utils.check_image_size(dataset_dict, image)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg
        image_shape = image_weak_aug.shape[:2]  # h, w

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )

            if self.compute_tight_boxes and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

            bboxes_d2_format = utils.filter_empty_instances(instances)
            dataset_dict["instances"] = bboxes_d2_format

        # apply strong augmentation
        # We use torchvision augmentation, which is not compatiable with
        # detectron2, which use numpy format for images. Thus, we need to
        # convert to PIL format first.
        image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB")
        image_strong_aug = np.array(self.strong_augmentation(image_pil))
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1))
        )

        dataset_dict_key = copy.deepcopy(dataset_dict)
        dataset_dict_key["image"] = torch.as_tensor(
            np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1))
        )
        assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1)
        assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2)
        return (dataset_dict, dataset_dict_key)
    # Input
    img_fpath = 'demo/data/input2.jpg'

    # Conduct instance segmentation
    predictor = ISEEInstanceSegmentation()
    # 1. Initialization
    err_no = predictor.init(config_file, params_dict)
    if err_no < 0:
        err_type = ISEEInstanceSegmentation.getErrType(err_no)
        print("ERROR: initialize the predictor FAILED - {}".format(err_type))
        exit(err_no)
    else:
        print("INFO: initialize the predictor SUCCESSFULLY!")
    # 2. Load image
    imgs_data = []
    img = read_image(img_fpath, format="BGR")
    imgs_data.append(img)
    # 3. Predict
    output = 'demo/data/'
    stamp1 = time.time()
    err_no = predictor.process(imgs_data, output=output)
    stamp2 = time.time()
    if err_no < 0:
        err_type = ISEEInstanceSegmentation.getErrType(err_no)
        print("ERROR: instance segmentation is conducted FAILED - {}".format(
            err_type))
    else:
        print("INFO: instance segmentation is conducted SUCCESSFULLY!")
    # 4. Get results
    segment_res_list = predictor.getResults()
    print(
Exemplo n.º 20
0
# Predicting random image from existing dataset

ravu_folder = Path(r"/media/jsieb/ED598/drone2go_ravu/drone2go_ravu")
image_paths = ravu_folder.rglob("*.JPG")
image_paths = [image_path for image_path in image_paths]

d = np.random.choice(data, size=1)[0]
random_image_path = d['file_name']
#random_image_path = str(np.random.choice(image_paths, size =1)[0])

#d = np.random.choice(data, 1)[0]
#random_image_path = d['file_name']
#random_image_path = r"/tmp/.X11-unix/e.jpeg"
#json_path = os.path.join(os.path.dirname(random_image_path),  f"{os.path.basename(random_image_path).split('.')[0]}.JSON")
im = read_image(random_image_path)
outputs = predictor(im[:, :, ::-1])
cpu_instances = outputs['instances'].to("cpu")

print(len(cpu_instances))

plt.imshow(im)

# Filtering
# threshold = 0.5
# class_check = cpu_instances.pred_classes.numpy() == 0
# score_check = cpu_instances.scores.numpy() > threshold
# indices = np.logical_and(class_check, score_check)
# if indices.any():
#     filtered_instances = cpu_instances[indices]
#     found = True
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        category = dataset_dict["annotations"][0]['category_id']

        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.image_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        if (int(category) != 5):
            boxes = np.asarray([
                BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                                BoxMode.XYXY_ABS)
                for instance in dataset_dict["annotations"]
            ])
            aug_input = T.StandardAugInput(image,
                                           boxes=boxes,
                                           sem_seg=sem_seg_gt)
            transforms = aug_input.apply_augmentations(self.augmentation)
            image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        else:
            boxes = np.asarray([0])

        # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        # transforms = aug_input.apply_augmentations(self.augmentation)
        # image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.

        if self.proposal_topk:
            if (int(category) != 5):
                utils.transform_proposals(
                    dataset_dict,
                    image_shape,
                    transforms,
                    proposal_topk=self.proposal_topk,
                    min_box_size=self.proposal_min_box_size,
                )

        if not self.is_train:
            if (int(category) != 5):
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                dataset_dict.pop("pano_seg_file_name", None)
                return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            if (int(category) != 5):
                annos = [
                    transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices,
                    ) for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
            segment_transform = transf.Compose([
                myTransform.FreeScaleMask((60, 100)),
                myTransform.MaskToTensor(),
            ])
            img_transform = transf.Compose([
                transf.Resize((288, 800)),
                transf.ToTensor(),
                transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ])
            if self.is_train:
                use_ax = True
            else:
                use_ax = False

            if (int(category) != 5):
                # dataset_dict['seg_label'] = torch.zeros([36,100,3])
                # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)]
                instances = annotations_to_instances(
                    annos, image_shape, mask_format=self.instance_mask_format)
                if self.recompute_boxes:
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )
                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

            else:
                cl = LaneClsDataset(
                    '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288',
                    img_path=dataset_dict['file_name'],
                    row_anchor=culane_row_anchor,
                    seg_path=dataset_dict['annotations'][0]['lanefilepath'],
                    segment_transform=segment_transform,
                    use_aux=use_ax)
                if use_ax:
                    img, cls, seg = cl.get_item()
                else:
                    img, cls = cl.get_item()
                    seg = 0
                # print('hahahahahahahahahah')img
                # import pdb; pdb.set_trace()
                # dataset_dict["image"] = img
                dataset_dict['seg_label'] = seg
                dataset_dict['cls_label'] = cls
                #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format)
            # Call lane class return label,...

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # if self.recompute_boxes:
            #     instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            # dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "train2017",
                    "thing_train2017").replace("image/train", "thing_train"))
            else:
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "coco", "lvis").replace("train2017", "thing_train"))
            # change extension to npz

            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt

        return dataset_dict
Exemplo n.º 22
0
    rval = False

count = 0  # Count frames
predictedd = 3
font = cv2.FONT_HERSHEY_SIMPLEX

while rval:
    cv2.imshow("Yoga-Pose-Estimation", frame)
    rval, frame = cam.read()
    count = count + 1
    print(count)

    # YOGA POSE ESTIMATION #
    # Get frame and load image
    cv2.imwrite('poseframe.png', frame)
    image = read_image('poseframe.png', format='BGR')
    height, width, _ = image.shape
    transform = detectron2.data.transforms.transform.ResizeTransform(h=height,
                                                                     w=width,
                                                                     new_h=800,
                                                                     new_w=800,
                                                                     interp=2)
    image = transform.apply_image(image)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

    test_loader = DataLoader([(image, 0)],
                             batch_size=1,
                             shuffle=False,
                             pin_memory=True)

    if count % 20 == 0:
Exemplo n.º 23
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        #print("BELOW IS THE dataset_dict (FOR DEBUGGING)")
        #print(dataset_dict)
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(
                dataset_dict["file_name"], format=self.image_format
            )
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        boxes = np.asarray(
            [
                BoxMode.convert(
                    instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
                )
                for instance in dataset_dict["annotations"]
            ]
        )
        aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1))
        )
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("train2017", "thing_train2017")
                    .replace("image/train", "thing_train")
                )
            else:
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("coco", "lvis")
                    .replace("train2017", "thing_train")
                )
            # change extension to npz
            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
Exemplo n.º 24
0

if __name__ == "__main__":

    args = get_parser().parse_args()

    logger = setup_logger(name='deekongai')
    logger.info("Arguments: " + str(args))

    cfg = setup_cfg(args)
    demo = VisualizationDemo(cfg)

    if args.input:
        for path in tqdm.tqdm(os.listdir(args.input), disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(os.path.join(args.input, path), format="BGR")
            start_time = time.time()
            predictions, visualized_output = demo.run_on_image(img)

            logger.info("{}: {} in {:.2f}s".format(
                path,
                "detected {} instances".format(len(predictions["instances"]))
                if "instances" in predictions else "finished",
                time.time() - start_time,
            ))
            if args.output:
                if os.path.isdir(args.output):
                    assert os.path.isdir(args.output), args.output
                    out_filename = os.path.join(args.output,
                                                os.path.basename(path))
                else:
Exemplo n.º 25
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                ) for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(annos,
                                                 image_shape,
                                                 mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
Exemplo n.º 26
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        import os
        if not os.path.exists(dataset_dict["file_name"]):
            name = dataset_dict["file_name"].replace("hico/train2015",
                                                     "coco2014/train2014")
        else:
            name = dataset_dict["file_name"]
        image = utils.read_image(name, format=self.img_format)
        # import ipdb;ipdb.set_trace()
        if dataset_dict['height'] != image.shape[0]:
            image = image.transpose([1, 0, 2])
        # print(dataset_dict)
        # np.asarray().transpose()
        # print(image.shape, type(image))
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                ) for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(annos, image_shape)
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Exemplo n.º 27
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

#         ################################################################################################################
#         print("AutoAugDet:", dataset_dict["file_name"])
#         h, w, c = image.shape
#         if h <= 0 or w <=0:
#             print("Empty image")
#         if self.autoaugdet and "annotations" in dataset_dict:
#             from detectron2.structures.boxes import BoxMode
#             bboxes = []
#             for label in dataset_dict["annotations"]:
#                 assert label['bbox_mode'] == BoxMode.XYWH_ABS
#                 bboxes.append(label['bbox'])
#             # import cv2, random
#             # showimg_in = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             try:
#                 image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet)
#             except Exception as  e:
#                 print("AutoAug Error:", e)
#             # showimg_out = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             # cv2.imshow("in", showimg_in)
#             # cv2.imshow("out", showimg_out)
#             # cv2.waitKey(0)
#             for i in range(len(bboxes)):
#                 dataset_dict["annotations"][i]['bbox'] = bboxes[i]

#         #################################################################################################       
        
        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)

            image, transforms = T.apply_transform_gens(self.tfm_gens, image)

            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
                
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
            
            # generate heatmaps of keypoints
            #if dataset_dict["instances"].has("gt_keypoints"):
            #
            
            #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps
            # turning instance-level segmentation map into semantic segmap            
            # get the contour map for segmentation-based detection            
            dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape)
            
            kpts = [obj.get("keypoints", []) for obj in annos]
            map_shape = (image_shape[0], image_shape[1])
            kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape)
            dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1)
            dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1)
            
            ################################################################
#             # visualize the keypoints
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path

#             image_rgb = image[..., ::-1]
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             binary_masks = kp_maps>0.1
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.shape)
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0])
#             assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks

#             for i in range(binary_masks.shape[0]):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            ################################################################
            
            ################################################
#             # visualize the segmentation mask
#             from os import path
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             segmask = dataset_dict["semseg"].tensor.numpy()
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
            
#             im = Image.fromarray(np.uint8(image_rgb))
#             filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png'
#             im.save(filepath)
            
#             im2 = Image.fromarray(np.uint8(segmask*3))
#             filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png'
#             im2.save(filepath2)
            
            ################################################
            
            ###############
#             # visualize the segmentation map and contours
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path
#             #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8)            
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             #binary_masks = dataset_dict["contours"].gt_segmasks.tensor
#             binary_masks = dataset_dict["contours"].gt_contours.tensor
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.size())
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0])
#             assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks.numpy()
# #             bm_uint8 = bm.astype("uint8")
# #             print(bm)
#             for i in range(binary_masks.size(0)):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            
################################################################################################# 

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
            
        return dataset_dict
    def __call__(self, dataset_dict):
        assert 'annotations' in dataset_dict, '今回はセグメンテーションのみを対象にする'
        assert not 'sem_seg_file_name' in dataset_dict, 'パノプティックセグメンテーションは行わない'

        dataset_dict = copy.deepcopy(dataset_dict)

        image = utils.read_image(dataset_dict['file_name'],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        # 明るさ・コントラスト・彩度・カットアウト
        if self.cont_gen is not None:
            tfm = self.cont_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.bright_gen is not None:
            tfm = self.bright_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.sat_gen is not None:
            tfm = self.sat_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.cutout_gen is not None:
            tfm = self.cutout_gen.get_transform(image)
            image = tfm.apply_image(image)

        # アフィン
        if self.rotate_gen is not None:
            rotate_tfm = self.rotate_gen.get_transform(image)
            image = rotate_tfm.apply_image(image)
        if self.shear_gen is not None:
            shear_tfm = self.shear_gen.get_transform(image)
            image = shear_tfm.apply_image(image)
        if self.extent_gen is not None:
            extent_tfm = self.extent_gen.get_transform(image)
            image = extent_tfm.apply_image(image)
        if self.crop_gen is not None:
            crop_tfm = utils.gen_crop_transform_with_instance(
                self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2],
                np.random.choice(dataset_dict['annotations']))
            image = crop_tfm.apply_image(image)

        image, transforms = T.apply_transform_gens(self.tfm_gens, image)

        if self.crop_gen is not None:
            transforms = crop_tfm + transforms
        if self.extent_gen is not None:
            transforms = extent_tfm + transforms
        if self.shear_gen is not None:
            transforms = shear_tfm + transforms
        if self.rotate_gen is not None:
            transforms = rotate_tfm + transforms

        # テストの場合はアノテーションがいらないので削除して終了
        if not self.is_train:
            dataset_dict.pop('annotations', None)
            dataset_dict.pop('sem_seg_file_name', None)
            return dataset_dict

        image_shape = image.shape[:2]  # h, w
        dataset_dict['image'] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        annos = [
            utils.transform_instance_annotations(obj,
                                                 transforms,
                                                 image_shape,
                                                 keypoint_hflip_indices=None)
            for obj in dataset_dict.pop('annotations')
            if obj.get("iscrowd", 0) == 0
        ]

        instances = utils.annotations_to_instances(
            annos, image_shape, mask_format=self.mask_format)

        # マスクからバウンディングボックスを作成
        if self.crop_gen and instances.has("gt_masks"):
            instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

        dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Exemplo n.º 29
0
if __name__ == "__main__":
    mp.set_start_method("spawn", force=True)
    args = get_parser().parse_args()
    cfg = setup_cfg(args)

    demo = VisualizationDemo(cfg)

    if args.input:
        if len(args.input) == 1:
            args.input = glob.glob(os.path.expanduser(args.input[0]))
            assert args.input, "The input path(s) was not found"

        for path in tqdm.tqdm(args.input, disable=not args.output):
            # use PIL, to be consistent with evaluation
            img = read_image(path, format="BGR")

            predictions, visualized_output = demo.run_on_image(img)

            if args.output:
                if os.path.isdir(args.output):
                    assert os.path.isdir(args.output), args.output
                    out_filename = os.path.join(args.output,
                                                os.path.basename(path))
                else:
                    assert len(
                        args.input
                    ) == 1, "Please specify a directory with args.output"
                    out_filename = args.output
                visualized_output.save(out_filename)
            else:
Exemplo n.º 30
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        for anno in dataset_dict["annotations"]:
            if not self.mask_on:
                anno.pop("segmentation", None)
            if not self.keypoint_on:
                anno.pop("keypoints", None)

        annos = [
            transform_rotated_boxes_annotations(obj, transforms)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]

        instances = rotated_annotations_to_instances(annos, image_shape)

        # Create a tight bounding box from masks, useful when image is cropped
        if self.crop_gen and instances.has("gt_masks"):
            instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

        dataset_dict["instances"] = utils.filter_empty_instances(instances)

        del annos, instances
        return dataset_dict