def doit(detector, raw_images):
    with torch.no_grad():
        # Preprocessing
        inputs = []
        for raw_image in raw_images:
            image = detector.transform_gen.get_transform(
                raw_image).apply_image(raw_image)
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs.append({
                "image": image,
                "height": raw_image.shape[0],
                "width": raw_image.shape[1]
            })
        images = detector.model.preprocess_image(inputs)

        # Run Backbone Res1-Res4
        features = detector.model.backbone(images.tensor)

        # Generate proposals with RPN
        proposals, _ = detector.model.proposal_generator(
            images, features, None)

        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in detector.model.roi_heads.in_features]
        box_features = detector.model.roi_heads._shared_roi_transform(
            features, proposal_boxes)
        feature_pooled = box_features.mean(
            dim=[2, 3])  # (sum_proposals, 2048), pooled to 1x1

        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_proposal_deltas = detector.model.roi_heads.box_predictor(
            feature_pooled)
        rcnn_outputs = FastRCNNOutputs(
            detector.model.roi_heads.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            detector.model.roi_heads.smooth_l1_beta,
        )

        # Fixed-number NMS
        instances_list, ids_list = [], []
        probs_list = rcnn_outputs.predict_probs()
        boxes_list = rcnn_outputs.predict_boxes()
        for probs, boxes, image_size in zip(probs_list, boxes_list,
                                            images.image_sizes):
            for nms_thresh in np.arange(0.3, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes,
                    probs,
                    image_size,
                    score_thresh=0.2,
                    nms_thresh=nms_thresh,
                    topk_per_image=MAX_BOXES)
                if len(ids) >= MIN_BOXES:
                    break
            instances_list.append(instances)
            ids_list.append(ids)

        # Post processing for features
        features_list = feature_pooled.split(
            rcnn_outputs.num_preds_per_image
        )  # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)]
        roi_features_list = []
        for ids, features in zip(ids_list, features_list):
            roi_features_list.append(features[ids].detach())

        # Post processing for bounding boxes (rescale to raw_image)
        raw_instances_list = []
        for instances, input_per_image, image_size in zip(
                instances_list, inputs, images.image_sizes):
            height = input_per_image.get("height", image_size[0])
            width = input_per_image.get("width", image_size[1])
            raw_instances = detector_postprocess(instances, height, width)
            raw_instances_list.append(raw_instances)

        return raw_instances_list, roi_features_list
示例#2
0
    def get_image_features(img_paths, folder):
        with torch.no_grad(
        ):  # https://github.com/sphinx-doc/sphinx/issues/4258
            inputs = []
            for i in range(len(img_paths)):
                im = cv2.imread(img_paths[i])
                original_image = im

                transform_gen = T.ResizeShortestEdge(
                    [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST],
                    cfg.INPUT.MAX_SIZE_TEST)
                # inputs = {"image": image, "height": height, "width": width}
                # predictions = model.backbone(torch.tensor([image]))

                height, width = original_image.shape[:2]
                image = transform_gen.get_transform(
                    original_image).apply_image(original_image)
                image = torch.as_tensor(
                    image.astype("float32").transpose(2, 0, 1))

                inputs.append({
                    "image": image,
                    "height": height,
                    "width": width
                })

            images = image_model.preprocess_image(inputs)

            predictions = image_model.backbone(images.tensor.cuda())
            proposals, _ = image_model.proposal_generator(
                images, predictions, None)
            # pred_instances = model.roi_heads(123123, 12412, 14212, None) #
            features = [
                predictions[f] for f in image_model.roi_heads.in_features
            ]
            head_outputs = []
            image_sizes = [x.image_size for x in proposals]
            for k in range(image_model.roi_heads.num_cascade_stages):
                if k > 0:
                    # The output boxes of the previous stage are the input proposals of the next stage
                    proposals = image_model.roi_heads._create_proposals_from_boxes(
                        head_outputs[-1].predict_boxes(), image_sizes)
                box_features = image_model.roi_heads.box_pooler(
                    features, [x.proposal_boxes for x in proposals])
                box_features = _ScaleGradient.apply(
                    box_features,
                    1.0 / image_model.roi_heads.num_cascade_stages)
                box_features = image_model.roi_heads.box_head[k](box_features)
                pred_class_logits, pred_proposal_deltas = image_model.roi_heads.box_predictor[
                    k](box_features)
                outputs = FastRCNNOutputs(
                    image_model.roi_heads.box2box_transform[k],
                    pred_class_logits,
                    pred_proposal_deltas,
                    proposals,
                    image_model.roi_heads.smooth_l1_beta,
                )
                head_outputs.append(outputs)
            scores_per_stage = [h.predict_probs() for h in head_outputs]
            scores = [
                sum(list(scores_per_image)) *
                (1.0 / image_model.roi_heads.num_cascade_stages)
                for scores_per_image in zip(*scores_per_stage)
            ]
            boxes = head_outputs[-1].predict_boxes()
            pred_instances = fast_rcnn_inference(
                boxes, scores, image_sizes,
                image_model.roi_heads.test_score_thresh,
                image_model.roi_heads.test_nms_thresh,
                image_model.roi_heads.test_detections_per_img, box_features)
            outputs = GeneralizedRCNN._postprocess(
                pred_instances[0], inputs,
                images.image_sizes), pred_instances[2]
        for path, instance, feats in zip(img_paths, outputs[0], outputs[1]):
            d = {}
            d['boxes'] = instance['instances'].pred_boxes.tensor.cpu().numpy()
            d['x'] = feats.cpu().numpy()
            np.savez_compressed(folder + path.split('/')[-1] + '.npz', d)
def doit(raw_image, predictor):
  NUM_OBJECTS = 36
  with torch.no_grad():
    raw_height, raw_width = raw_image.shape[:2]
    # print("Original image size: ", (raw_height, raw_width))
    
    # Preprocessing
    image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
    # print("Transformed image size: ", image.shape[:2])
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
    inputs = [{"image": image, "height": raw_height, "width": raw_width}]
    images = predictor.model.preprocess_image(inputs)
    
    # Run Backbone Res1-Res4
    features = predictor.model.backbone(images.tensor)
    
    # Generate proposals with RPN
    proposals, _ = predictor.model.proposal_generator(images, features, None)
    proposal = proposals[0]
    # print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)
    
    # Run RoI head for each proposal (RoI Pooling + Res5)
    proposal_boxes = [x.proposal_boxes for x in proposals]
    features = [features[f] for f in predictor.model.roi_heads.in_features]
    box_features = predictor.model.roi_heads._shared_roi_transform(
        features, proposal_boxes
    )
    feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
    # print('Pooled features size:', feature_pooled.shape)
    
    # Predict classes and boxes for each proposal.
    pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
    outputs = FastRCNNOutputs(
        predictor.model.roi_heads.box2box_transform,
        pred_class_logits,
        pred_proposal_deltas,
        proposals,
        predictor.model.roi_heads.smooth_l1_beta,
    )
    probs = outputs.predict_probs()[0]
    boxes = outputs.predict_boxes()[0]
    
    #add for test
    #print(outputs.predict_boxes())
    #print(outputs.predict_boxes()[0].shape)
    #add for test

    attr_prob = pred_attr_logits[..., :-1].softmax(-1)
    max_attr_prob, max_attr_label = attr_prob.max(-1)
    
    # Note: BUTD uses raw RoI predictions,
    #       we use the predicted boxes instead.
    # boxes = proposal_boxes[0].tensor    
    
    # NMS
    for nms_thresh in np.arange(0.5, 1.0, 0.1):
        instances, ids = fast_rcnn_inference_single_image(
            boxes, probs, image.shape[1:], 
            score_thresh=0.2, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
        )
        if len(ids) == NUM_OBJECTS:
            break
            
    instances = detector_postprocess(instances, raw_height, raw_width)
    roi_features = feature_pooled[ids].detach()
    # max_attr_prob = max_attr_prob[ids].detach()
    # max_attr_label = max_attr_label[ids].detach()
    # instances.attr_scores = max_attr_prob
    # instances.attr_classes = max_attr_label

    img_bboxes = instances.get_fields()['pred_boxes'].tensor.cpu().numpy()
    roi_features = roi_features.cpu().numpy()

    #print(instances)
    
    return img_bboxes, roi_features
        proposal = proposals[0]
        print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)

        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes)
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        print('Pooled features size:', feature_pooled.shape)

        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
            feature_pooled)
        outputs = FastRCNNOutputs(predictor.model.roi_heads.box2box_transform,
                                  pred_class_logits, pred_proposal_deltas,
                                  proposals,
                                  predictor.model.roi_heads.smooth_l1_beta)
        probs = outputs.predict_probs()[0]
        boxes = outputs.predict_boxes()[0]

        attr_prob = pred_attr_logits[..., :-1].softmax(-1)
        max_attr_prob, max_attr_label = attr_prob.max(-1)

        # Note: BUTD uses raw RoI predictions, we use the predicted boxes instead.
        # boxes = proposal_boxes[0].tensor
        # NMS

        for nms_thresh in np.arange(0.5, 0.7, 0.1):
            instances, ids = fast_rcnn_inference_single_image(
                boxes,
                probs,
    def doit(self, raw_image, autocasting=False):
        raw_image = self.get_cv2_image(raw_image)
        from detectron2.modeling.postprocessing import detector_postprocess
        from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs, fast_rcnn_inference_single_image
        predictor = self.predictor
        with torch.no_grad():
            NUM_OBJECTS = 36
            raw_height, raw_width = raw_image.shape[:2]
            image = predictor.transform_gen.get_transform(
                raw_image).apply_image(raw_image)
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = [{
                "image": image,
                "height": raw_height,
                "width": raw_width
            }]
            images = predictor.model.preprocess_image(inputs)

            # Run Backbone Res1-Res4
            features = predictor.model.backbone(images.tensor)

            # Generate proposals with RPN
            proposals, _ = predictor.model.proposal_generator(
                images, features, None)
            proposal = proposals[0]
            proposal_boxes = [x.proposal_boxes for x in proposals]
            features = [
                features[f] for f in predictor.model.roi_heads.in_features
            ]
            if autocasting:
                # Half precision casting for fp16
                for box in proposal_boxes:
                    box.tensor = box.tensor.type(torch.cuda.HalfTensor)
                features = [f.type(torch.cuda.HalfTensor) for f in features]
                # Run RoI head for each proposal (RoI Pooling + Res5)

            box_features = predictor.model.roi_heads._shared_roi_transform(
                features, proposal_boxes)
            feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
                feature_pooled)

            outputs = FastRCNNOutputs(
                predictor.model.roi_heads.box2box_transform,
                pred_class_logits,
                pred_proposal_deltas,
                proposals,
                predictor.model.roi_heads.smooth_l1_beta,
            )
            probs = outputs.predict_probs()[0]
            boxes = outputs.predict_boxes()[0]

            # probs = predictor.model.roi_heads.box_predictor.predict_probs((pred_class_logits, pred_proposal_deltas,), proposals)[0]
            # boxes = predictor.model.roi_heads.box_predictor.predict_boxes((pred_class_logits, pred_proposal_deltas,), proposals)[0]

            attr_prob = pred_attr_logits[..., :-1].softmax(-1)
            max_attr_prob, max_attr_label = attr_prob.max(-1)

            # Note: BUTD uses raw RoI predictions,
            #       we use the predicted boxes instead.
            # boxes = proposal_boxes[0].tensor

            # NMS
            for nms_thresh in np.arange(0.5, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes,
                    probs,
                    image.shape[1:],
                    score_thresh=0.2,
                    nms_thresh=nms_thresh,
                    topk_per_image=NUM_OBJECTS)
                if len(ids) == NUM_OBJECTS:
                    break

            instances = detector_postprocess(instances, raw_height, raw_width)
            roi_features = feature_pooled[ids].detach()
            max_attr_prob = max_attr_prob[ids].detach()
            max_attr_label = max_attr_label[ids].detach()
            instances.attr_scores = max_attr_prob
            instances.attr_classes = max_attr_label
            return instances, roi_features
示例#6
0
    def _get_image_features(self,
                            imgs,
                            score_thresh=0.2,
                            min_num_image=10,
                            max_regions=36,
                            tokens=[]):
        # imgs tensor(batch, H, W, C)
        with torch.no_grad():
            inputs = []
            for img in imgs:
                raw_img = img.permute(2, 0, 1)
                raw_img = raw_img.to(self.detectron2_gpu_device)
                (_, height, width) = raw_img.shape
                inputs.append({
                    "image": raw_img,
                    "height": height,
                    "width": width
                })

            # Normalize the image by substracting mean
            # Moves the image to device (already in device)
            images = self.detector.model.preprocess_image(inputs)
            sizes = images.image_sizes

            # Features from the backbone
            features = self.detector.model.backbone(images.tensor)

            # Get RPN proposals
            # proposal_generator inputs are the images, features, gt_instances
            # since is detect we don't need the gt instances
            proposals, _ = self.detector.model.proposal_generator(
                images, features, None)
            images = None
            # The C4 model uses Res5ROIHeads where pooled feature can be extracted
            proposal_boxes = [x.proposal_boxes for x in proposals]
            features = [
                features[f] for f in self.detector.model.roi_heads.in_features
            ]
            box_features = self.detector.model.roi_heads._shared_roi_transform(
                features, proposal_boxes)
            features = None
            # Pooled features to use in the agent
            feature_pooled = box_features.mean(dim=[2, 3])

            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_proposal_deltas = \
                self.detector.model.roi_heads.box_predictor(feature_pooled)

            rcnn_outputs = FastRCNNOutputs(
                self.detector.model.roi_heads.box2box_transform,
                pred_class_logits,
                pred_proposal_deltas,
                proposals,
                self.detector.model.roi_heads.smooth_l1_beta,
            )
            proposals = None

            # Filter proposals using Non-Maximum Suppression (NMS)
            instances_list, ids_list = [], []
            probs_list = rcnn_outputs.predict_probs()
            boxes_list = rcnn_outputs.predict_boxes()
            #image_shapes = [x.image_size for x in proposals]
            num_boxes = []
            for probs, boxes, image_size in zip(probs_list, boxes_list, sizes):

                # We need to get topk_per_image boxes so we gradually increase
                # the tolerance of the nms_thresh if we don't have enough boxes
                for nms_thresh in np.arange(0.3, 1.0, 0.1):
                    instances, ids = fast_rcnn_inference_single_image(
                        boxes,
                        probs,
                        image_size,
                        score_thresh=score_thresh,
                        nms_thresh=nms_thresh,
                        topk_per_image=max_regions,
                        device=self.detectron2_gpu_device,
                        preferred_labels=self.class_names,
                        tokens=tokens,
                        tokenizer=self.tokenizer)
                    #
                    if len(ids) >= min_num_image:
                        break
                num_boxes.append(len(ids) + 1)
                instances_list.append(instances)
                ids_list.append(ids)

            # Post processing for features
            features_list = feature_pooled.split(
                rcnn_outputs.num_preds_per_image
            )  # (sum_proposals, 2048) --> [(p1, 2048), (p2, 2048), ..., (pn, 2048)]
            feature_pooled = None
            roi_features_list = []
            for ids, features in zip(ids_list, features_list):
                head_box = torch.sum(features[ids], axis=0) / \
                           len(features[ids])
                head_box = head_box.unsqueeze(0)
                roi_features_list.append(
                    torch.cat((head_box, features[ids]), 0))

            # Post processing for bounding boxes (rescale to raw_image)
            boxes = []
            classes = []
            for instances, input_per_image, image_size in zip(
                    instances_list, inputs, sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                raw_instances = detector_postprocess(instances, height, width)

                box = torch.zeros((len(raw_instances) + 1, 5),
                                  device=self.detectron2_gpu_device)
                box[0] = torch.tensor(
                    [[0, 0, 1, 1, 1]],
                    device=self.detectron2_gpu_device).float()
                box[1:, :4] = raw_instances.pred_boxes.tensor
                box[:, 0] /= float(width)
                box[:, 1] /= float(height)
                box[:, 2] /= float(width)
                box[:, 3] /= float(height)
                box[:,4] = (box[:,3] - box[:,1]) * (box[:,2] - box[:,0]) / \
                    (float(height) * float(width))

                boxes.append(box)
                classes.append(raw_instances.pred_classes)
            # features, boxes, image_mask
            return roi_features_list, boxes, num_boxes, classes  #, pred_proposal_deltas
示例#7
0
    def inference(self, batched_inputs, do_postprocess=True):
        """
        Run inference on the given inputs.

        Args:
            batched_inputs (list[dict]): same as in :meth:`forward`
            detected_instances (None or list[Instances]): if not None, it
                contains an `Instances` object per image. The `Instances`
                object contains "pred_boxes" and "pred_classes" which are
                known boxes in the image.
                The inference will then skip the detection of bounding boxes,
                and only predict other per-ROI outputs.
            do_postprocess (bool): whether to apply post-processing on the outputs.

        Returns:
            same as in :meth:`forward`.
        """
        assert not self.training
        images = self.preprocess_image(batched_inputs)
        features = self.backbone(images.tensor)

        if self.attention is not None:
            attn_weights = self.attention(features['res4'])
            features['res4'] = features['res4'] * attn_weights.repeat(
                1, features['res4'].size(1), 1, 1)

        if self.proposal_generator:
            proposals, _ = self.proposal_generator(images, features, None)
        else:
            assert "proposals" in batched_inputs[0]
            proposals = [
                x["proposals"].to(self.device) for x in batched_inputs
            ]

        proposals, box_features_0 = self.roi_heads.transform_forward(
            images, features, proposals)

        box_features = self.roi_heads.box_head(box_features_0)
        pred_class_logits_0, pred_proposal_deltas_0 = self.roi_heads.box_predictor(
            box_features)

        box_features_T = box_features_0 + self.transformation(box_features_0)
        box_features = self.box_head(box_features_T)
        pred_class_logits_1, pred_proposal_deltas_1 = self.box_predictor(
            box_features)

        outputs = FastRCNNOutputs(
            self.roi_heads.box2box_transform,
            pred_class_logits_1,
            pred_proposal_deltas_0,
            proposals,
            self.roi_heads.smooth_l1_beta,
        )

        results, _ = outputs.inference(self.roi_heads.test_score_thresh,
                                       self.roi_heads.test_nms_thresh,
                                       self.roi_heads.test_detections_per_img)

        if do_postprocess:
            processed_results = []
            for results_per_image, input_per_image, image_size in zip(
                    results, batched_inputs, images.image_sizes):
                height = input_per_image.get("height", image_size[0])
                width = input_per_image.get("width", image_size[1])
                r = detector_postprocess(results_per_image, height, width)
                processed_results.append({"instances": r})
            return processed_results
        else:
            return results
示例#8
0
    def doit(self, raw_image):
        with torch.no_grad():
            raw_height, raw_width = raw_image.shape[:2]
            #print("Original image size: ", (raw_height, raw_width))

            # Preprocessing
            image = self.predictor.transform_gen.get_transform(
                raw_image).apply_image(raw_image)
            #print("Transformed image size: ", image.shape[:2])
            image_height, image_width = image.shape[:2]
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = [{
                "image": image,
                "height": raw_height,
                "width": raw_width
            }]
            images = self.predictor.model.preprocess_image(inputs)

            # Run Backbone Res1-Res4
            features = self.predictor.model.backbone(images.tensor)

            # Generate proposals with RPN
            proposals, _ = self.predictor.model.proposal_generator(
                images, features, None)
            proposal = proposals[0]
            #print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)

            # Run RoI head for each proposal (RoI Pooling + Res5)
            proposal_boxes = [x.proposal_boxes for x in proposals]
            features = [
                features[f] for f in self.predictor.model.roi_heads.in_features
            ]
            box_features = self.predictor.model.roi_heads._shared_roi_transform(
                features, proposal_boxes)
            feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
            #print('Pooled features size:', feature_pooled.shape)

            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_attr_logits, pred_proposal_deltas = self.predictor.model.roi_heads.box_predictor(
                feature_pooled)
            outputs = FastRCNNOutputs(
                self.predictor.model.roi_heads.box2box_transform,
                pred_class_logits,
                pred_proposal_deltas,
                proposals,
                self.predictor.model.roi_heads.smooth_l1_beta,
            )
            probs = outputs.predict_probs()[0]
            boxes = outputs.predict_boxes()[0]

            box_width = boxes[:, 2] - boxes[:, 0]
            box_height = boxes[:, 3] - boxes[:, 1]
            scaled_width = box_width / image_width
            scaled_height = box_height / image_height
            scaled_x = boxes[:, 0] / image_width
            scaled_y = boxes[:, 1] / image_height
            scaled_width = scaled_width[..., np.newaxis]
            scaled_height = scaled_height[..., np.newaxis]
            scaled_x = scaled_x[..., np.newaxis]
            scaled_y = scaled_y[..., np.newaxis]
            spatial_features = torch.cat(
                (scaled_x, scaled_y, scaled_x + scaled_width,
                 scaled_y + scaled_height, scaled_width, scaled_height), 1)
            oscar_features = torch.cat((feature_pooled, spatial_features), 1)

            attr_prob = pred_attr_logits[..., :-1].softmax(-1)
            max_attr_prob, max_attr_label = attr_prob.max(-1)

            # Note: BUTD uses raw RoI predictions,
            #       we use the predicted boxes instead.
            # boxes = proposal_boxes[0].tensor

            # NMS
            for nms_thresh in np.arange(0.5, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes,
                    probs,
                    image.shape[1:],
                    score_thresh=0.2,
                    nms_thresh=nms_thresh,
                    topk_per_image=self.NUM_OBJECTS)
                if len(ids) >= self.NUM_OBJECTS:
                    break
            instances = detector_postprocess(instances, raw_height, raw_width)
            roi_features = feature_pooled[ids].detach()
            oscar_features = oscar_features[ids].detach()
            max_attr_prob = max_attr_prob[ids].detach()
            max_attr_label = max_attr_label[ids].detach()
            instances.attr_scores = max_attr_prob
            instances.attr_classes = max_attr_label

            return instances, roi_features, oscar_features
示例#9
0
    def doit(self):
        data_path = 'data/genome/'
        vg_classes = []
        with open(os.path.join(data_path, 'objects_vocab.txt')) as f:
            for object in f.readlines():
                vg_classes.append(object.split(',')[0].lower().strip())

        vg_attrs = []
        with open(os.path.join(data_path, 'attributes_vocab.txt')) as f:
            for object in f.readlines():
                vg_attrs.append(object.split(',')[0].lower().strip())

        MetadataCatalog.get("vg").thing_classes = vg_classes
        MetadataCatalog.get("vg").attr_classes = vg_attrs

        cfg = get_cfg()
        cfg.merge_from_file(
            "./configs/VG-Detection/faster_rcnn_R_101_C4_attr_caffemaxpool.yaml"
        )
        cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
        cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
        cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
        # VG Weight
        cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe_attr_original.pkl"
        predictor = DefaultPredictor(cfg)
        with torch.no_grad():
            raw_height, raw_width = self.raw_image.shape[:2]
            print("Original image size: ", (raw_height, raw_width))

            # Preprocessing
            image = predictor.transform_gen.get_transform(
                self.raw_image).apply_image(self.raw_image)
            print("Transformed image size: ", image.shape[:2])
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = [{
                "image": image,
                "height": raw_height,
                "width": raw_width
            }]
            images = predictor.model.preprocess_image(inputs)

            # Run Backbone Res1-Res4
            features = predictor.model.backbone(images.tensor)

            # Generate proposals with RPN
            proposals, _ = predictor.model.proposal_generator(
                images, features, None)
            proposal = proposals[0]
            print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)

            # Run RoI head for each proposal (RoI Pooling + Res5)
            proposal_boxes = [x.proposal_boxes for x in proposals]
            features = [
                features[f] for f in predictor.model.roi_heads.in_features
            ]
            box_features = predictor.model.roi_heads._shared_roi_transform(
                features, proposal_boxes)
            feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
            print('Pooled features size:', feature_pooled.shape)

            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
                feature_pooled)
            outputs = FastRCNNOutputs(
                predictor.model.roi_heads.box2box_transform,
                pred_class_logits,
                pred_proposal_deltas,
                proposals,
                predictor.model.roi_heads.smooth_l1_beta,
            )
            probs = outputs.predict_probs()[0]
            boxes = outputs.predict_boxes()[0]

            attr_prob = pred_attr_logits[..., :-1].softmax(-1)
            max_attr_prob, max_attr_label = attr_prob.max(-1)

            # Note: BUTD uses raw RoI predictions,
            #       we use the predicted boxes instead.
            # boxes = proposal_boxes[0].tensor

            # NMS
            for nms_thresh in np.arange(0.5, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes,
                    probs,
                    image.shape[1:],
                    score_thresh=0.2,
                    nms_thresh=nms_thresh,
                    topk_per_image=self.NUM_OBJECTS)
                if len(ids) >= self.NUM_OBJECTS:
                    break

            instances = detector_postprocess(instances, raw_height, raw_width)
            roi_features = feature_pooled[ids].detach()
            max_attr_prob = max_attr_prob[ids].detach()
            max_attr_label = max_attr_label[ids].detach()
            instances.attr_scores = max_attr_prob
            instances.attr_classes = max_attr_label

            print(instances)
            print((roi_features).size())

            return roi_features
示例#10
0
def extract_featrue(predictor, split, dataset_name, data, th=0.2):
    save_path = f'/home/sunjiamei/work/ImageCaptioning/dataset/{dataset_name}_bu_features/{split}'
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    h5_file_name = []
    print(len(data))
    for idx,item in enumerate(data):
        print(idx, len(data))
        img_path = item['image_path']
        raw_image = cv2.imread(img_path)
        # im_rgb = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB)
        image_filename = img_path.split('/')[-1]
        if image_filename not in h5_file_name:
            h5_file_name.append(image_filename)
        else:
            continue
        with torch.no_grad():
            raw_height, raw_width = raw_image.shape[:2]
            # print("Original image size: ", (raw_height, raw_width))
            # Preprocessing
            image = predictor.transform_gen.get_transform(raw_image).apply_image(raw_image)
            # print("Transformed image size: ", image.shape[:2])
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
            inputs = [{"image": image, "height": raw_height, "width": raw_width}]
            images = predictor.model.preprocess_image(inputs)

            # Run Backbone Res1-Res4
            features = predictor.model.backbone(images.tensor)

            # Generate proposals with RPN
            proposals, _ = predictor.model.proposal_generator(images, features, None)
            proposal = proposals[0]
            # print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)

            # Run RoI head for each proposal (RoI Pooling + Res5)
            proposal_boxes = [x.proposal_boxes for x in proposals]
            features = [features[f] for f in predictor.model.roi_heads.in_features]
            box_features = predictor.model.roi_heads._shared_roi_transform(
                features, proposal_boxes
            )
            feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
            # print('Pooled features size:', feature_pooled.shape)

            # Predict classes and boxes for each proposal.
            pred_class_logits, pred_attr_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
                feature_pooled)
            outputs = FastRCNNOutputs(
                predictor.model.roi_heads.box2box_transform,
                pred_class_logits,
                pred_proposal_deltas,
                proposals,
                predictor.model.roi_heads.smooth_l1_beta,
            )
            probs = outputs.predict_probs()[0]
            boxes = outputs.predict_boxes()[0]

            attr_prob = pred_attr_logits[..., :-1].softmax(-1)
            max_attr_prob, max_attr_label = attr_prob.max(-1)

            # Note:
            #       we use the predicted boxes instead.
            # boxes = proposal_boxes[0].tensor

            # NMS
            for nms_thresh in np.arange(0.5, 1.0, 0.1):
                instances, ids = fast_rcnn_inference_single_image(
                    boxes, probs, image.shape[1:],
                    score_thresh=th, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
                )
                if len(ids) == NUM_OBJECTS:
                    break
            instances = detector_postprocess(instances, raw_height, raw_width)
            roi_features = feature_pooled[ids].detach()
            max_attr_prob = max_attr_prob[ids].detach()
            max_attr_label = max_attr_label[ids].detach()
            instances.attr_scores = max_attr_prob
            instances.attr_classes = max_attr_label
            if roi_features.size(0)< NUM_OBJECTS:
                extract_single(img_path, save_path, image_filename)
                continue
            h5_save_file = h5py.File(os.path.join(save_path, image_filename+'.hdf5'), 'w')
            h5_save_file.create_dataset('image_features', data=roi_features.cpu().numpy())
            h5_save_file.create_dataset('image_boxes', data=instances.pred_boxes.tensor.cpu().numpy())
            h5_save_file.close()
def run_detector(raw_image, predictor, num_objects=8000, verbose=True):
    with torch.no_grad():
        raw_height, raw_width = raw_image.shape[:2]
        if verbose:
            tqdm.write("Original image size: " + str((raw_height, raw_width)))

        # Preprocessing
        image = predictor.transform_gen.get_transform(raw_image).apply_image(
            raw_image)
        if verbose:
            tqdm.write("Transformed image size: " + str(image.shape[:2]))
        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
        inputs = [{"image": image, "height": raw_height, "width": raw_width}]
        images = predictor.model.preprocess_image(inputs)

        # Run Backbone Res1-Res4
        features = predictor.model.backbone(images.tensor)

        # Generate proposals with RPN
        proposals, _ = predictor.model.proposal_generator(
            images, features, None)
        proposal = proposals[0]
        if verbose:
            tqdm.write('Proposal Boxes size: ' +
                       str(proposal.proposal_boxes.tensor.shape))

        # Run RoI head for each proposal (RoI Pooling + Res5)
        proposal_boxes = [x.proposal_boxes for x in proposals]
        features = [features[f] for f in predictor.model.roi_heads.in_features]
        box_features = predictor.model.roi_heads._shared_roi_transform(
            features, proposal_boxes)
        feature_pooled = box_features.mean(dim=[2, 3])  # pooled to 1x1
        if verbose:
            tqdm.write('Pooled features size: ' + str(feature_pooled.shape))

        # Predict classes and boxes for each proposal.
        pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(
            feature_pooled)
        outputs = FastRCNNOutputs(
            predictor.model.roi_heads.box2box_transform,
            pred_class_logits,
            pred_proposal_deltas,
            proposals,
            predictor.model.roi_heads.smooth_l1_beta,
        )
        probs = outputs.predict_probs()[0]
        boxes = outputs.predict_boxes()[0]

        # Note: BUTD uses raw RoI predictions,
        #       we use the predicted boxes instead.
        # boxes = proposal_boxes[0].tensor

        # NMS
        for nms_thresh in np.arange(0.5, 1.0, 0.1):
            instances, ids = fast_rcnn_inference_single_image(
                boxes,
                probs,
                image.shape[1:],
                score_thresh=0.0,
                nms_thresh=nms_thresh,
                topk_per_image=num_objects  #0.01
            )
            if len(ids) == num_objects:
                break

        instances = detector_postprocess(instances, raw_height, raw_width)
        #roi_features = feature_pooled[ids].detach()
        if verbose: tqdm.write(str(instances))

        return instances  #, roi_features