示例#1
0
    def postprocess_detections(self, class_logits, box_regression, proposals, image_shapes):
        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            # and just assign to pred_boxes instead of pred_boxes_list
            pred_boxes_list = [pred_boxes]
            pred_scores_list = [pred_scores]
        else:
            pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
            pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
示例#2
0
    def filter_proposals(self, loc_delta, anchors):
        decoded_boxes = decode(loc_delta, anchors, self.variance)
        decoded_boxes = box_ops.clip_boxes_to_image(decoded_boxes,
                                                    self.img_size)
        keep = box_ops.remove_small_boxes(decoded_boxes, self.min_size)
        decoded_boxes = decoded_boxes[keep]

        return decoded_boxes
示例#3
0
    def get_linear_boxes(self):
        box = []
        for t in self.track_actives:
            box.append(self.get_linear_box(t))

        box = torch.stack([torch.Tensor(t) for t in box], 0)
        box = clip_boxes_to_image(box, self.img.shape[-2:])
        return box
示例#4
0
    def ssm_postprocess_detections(self, class_logits, box_regression, proposals, image_shapes):
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        al_idx = 0
        all_boxes = torch.empty([0, 4]).cuda()
        all_scores = torch.tensor([]).cuda()
        all_labels = []
        CONF_THRESH = 0.5  # bigger leads more active learning samples
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            if torch.max(scores) < CONF_THRESH:
                al_idx = 1
                continue
            for cls_ind in range(num_classes - 1):
                cls_boxes = boxes[:, cls_ind]
                cls_scores = scores[:, cls_ind]
                cls_labels = labels[:, cls_ind]
                # batch everything, by making every class prediction be a separate instance
                cls_boxes = cls_boxes.reshape(-1, 4)
                cls_scores = cls_scores.flatten()
                cls_labels = cls_labels.flatten()

                # remove low scoring boxes

                # non-maximum suppression, independently done per class
                keep = box_ops.batched_nms(cls_boxes, cls_scores, cls_labels, self.nms_thresh)
                # keep only topk scoring predictions
                keep = keep[:self.detections_per_img]
                cls_boxes, cls_scores, cls_labels = cls_boxes[keep], cls_scores[keep], cls_labels[keep]
                inds = torch.nonzero(cls_scores > self.score_thresh).squeeze(1)
                if len(inds) == 0:
                    continue
                for j in inds:
                    # boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

                    all_boxes = torch.cat((all_boxes, cls_boxes[j].unsqueeze(0)), 0)
                    k = keep[j]
                    all_scores = torch.cat((all_scores, scores[k].unsqueeze(0)), 0)
                    all_labels.append(judge_y(scores[k]))
        # all_scores = [torch.cat(all_scores, 1)]
        return [all_boxes], [all_scores], [all_labels], al_idx
示例#5
0
    def filter_proposals(
        self,
        proposals: Tensor,
        objectness: Tensor,
        image_shapes: List[Tuple[int, int]],
        num_anchors_per_level: List[int],
    ) -> Tuple[List[Tensor], List[Tensor]]:

        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop through objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        objectness_prob = torch.sigmoid(objectness)

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob,
                                                 levels, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)

            # remove small boxes
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # remove low scoring boxes
            # use >= for Backwards compatibility
            keep = torch.where(scores >= self.score_thresh)[0]
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]

            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)

            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]

            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
示例#6
0
    def postprocess_detections(self, class_logits, box_regression, proposals,
                               image_shapes):
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            # TODO: looks like min_size=1. is not enough for us and we need min_size=1.01
            # because we are using int(.) when discretizing the bbox and maybe there is a problem
            # with floats so we get a - b > 1.0, but int(a) - int(b) = 0
            keep = box_ops.remove_small_boxes(boxes, min_size=1.01)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
示例#7
0
    def postprocess_detections(self,
                               class_logits,    # type: Tensor
                               box_regression,  # type: Tensor
                               proposals,       # type: List[Tensor]
                               image_shapes     # type: List[Tuple[int, int]]
                               ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list, pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.where(scores > self.score_thresh)[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
    def regress_tracks(self, blob, plot_compare=False, frame=None):
        """Regress the position of the tracks and also checks their scores."""
        if self.finetuning_config["enabled"]:
            scores = []
            pos = []
            for track in self.tracks:
                # Regress with finetuned bbox head for each track
                assert track.box_head is not None
                assert track.box_predictor is not None

                box, score = self.obj_detect.predict_boxes(
                    track.pos,
                    box_head=track.box_head,
                    box_predictor=track.box_predictor)

                if plot_compare:
                    box_no_finetune, score_no_finetune = self.obj_detect.predict_boxes(
                        track.pos)
                    plot_compare_bounding_boxes(box, box_no_finetune,
                                                blob['img'])
                scores.append(score)
                bbox = clip_boxes_to_image(box, blob['img'].shape[-2:])
                pos.append(bbox)
            scores = torch.cat(scores)
            pos = torch.cat(pos)
        else:
            pos = self.get_pos()
            boxes, scores = self.obj_detect.predict_boxes(pos)
            pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]
            if scores[i] <= self.regression_person_thresh:
                self.tracks_to_inactive([t])
            else:
                s.append(scores[i])
            # t.prev_pos = t.pos
            t.pos = pos[i].view(1, -1)

        scores_of_active_tracks = torch.Tensor(s[::-1]).to(device)

        return scores_of_active_tracks
    def check_vis_results(self):
        # dummy inputs. do not affect the vis results
        last_pos_1 = [t.last_pos[-2] for t in self.tracks]
        last_pos_2 = [t.last_pos[-1] for t in self.tracks]  # same as t.pos
        last_pos_1 = torch.cat(last_pos_1, 0)
        last_pos_2 = torch.cat(last_pos_2, 0)

        curr_pos = self.get_pos()
        curr_pos = clip_boxes_to_image(curr_pos, self.last_image.shape[-2:])

        conv_features, repr_features = self.get_pooled_features(curr_pos)

        if isinstance(self.vis_model, MotionModel) or isinstance(
                self.vis_model, MotionModelV2):
            _, vis = self.vis_model(conv_features, repr_features, last_pos_1,
                                    last_pos_2)
        elif isinstance(self.vis_model, BackboneMotionModel):
            img = [self.last_image.cuda()]
            target = [{"boxes": curr_pos}]

            _, vis = self.vis_model(img, target, last_pos_1, last_pos_2)
        elif isinstance(self.vis_model, MotionModelReID):
            historical_reid_features = [
                torch.cat(list(t.features), 0) for t in self.tracks
            ]
            curr_reid_features = self.reid_network.test_rois(
                self.last_image.unsqueeze(0), curr_pos)

            _, vis = self.vis_model(historical_reid_features,
                                    curr_reid_features, conv_features,
                                    repr_features, last_pos_1, last_pos_2)
        elif isinstance(self.vis_model, MotionModelSimpleReID) or isinstance(
                self.vis_model, MotionModelV3):
            early_reid_features = torch.stack([
                torch.mean(torch.cat(t.early_features, 0), 0)
                for t in self.tracks
            ], 0)
            curr_reid_features = self.reid_network.test_rois(
                self.last_image.unsqueeze(0), curr_pos)

            _, vis = self.vis_model(early_reid_features, curr_reid_features,
                                    conv_features, repr_features, last_pos_1,
                                    last_pos_2)
        elif isinstance(self.vis_model, MotionModelSimpleReIDV2):
            early_reid_features = torch.stack([
                torch.mean(torch.cat(t.early_features, 0), 0)
                for t in self.tracks
            ], 0)
            curr_reid_features = self.reid_network.test_rois(
                self.last_image.unsqueeze(0), curr_pos)

            _, vis = self.vis_model(early_reid_features, curr_reid_features,
                                    repr_features, last_pos_1, last_pos_2)

        for i, t in enumerate(self.tracks):
            t.vis = vis[i].item()
示例#10
0
    def regress_tracks(self, blob, prev_boxes):
        """Regress the position of the tracks and also checks their scores."""
        if prev_boxes is None:
            prev_boxes = self.get_pos()
            boxes_to_shift = prev_boxes
            enlarged_boxes = clip_boxes_to_image(
                self.enlarge_boxes(boxes_to_shift), blob['img'].shape[-2:])
        else:
            boxes_to_shift = self.get_pos()
            enlarged_boxes = clip_boxes_to_image(
                self.enlarge_boxes(boxes_to_shift), blob['img'].shape[-2:])
        positions = enlarged_boxes

        if self.use_correlation:
            correlated_boxes = self.obj_detect.predict_with_correlation(
                prev_boxes, enlarged_boxes, boxes_to_shift)
            correlated_boxes = clip_boxes_to_image(correlated_boxes,
                                                   blob['img'].shape[-2:])
            positions = correlated_boxes
            if self.write_debug_images:
                plot_tracktor_image(blob, positions,
                                    [t.id for t in self.tracks],
                                    "2_after_correlation")

        boxes, scores = self.obj_detect.predict_boxes(positions)
        pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]
            if scores[i] <= self.regression_person_thresh:
                self.tracks_to_inactive([t])
                self.score_killed_tracks.append({
                    'id': t.id,
                    'frame': self.im_index
                })
            else:
                s.append(scores[i])
                # t.prev_pos = t.pos
                t.pos = pos[i].view(1, -1)

        return torch.Tensor(s[::-1]).cuda()
示例#11
0
    def match(self, boxes, targets):

        m, n = boxes.shape[0], targets.shape[0]
        idx = torch.zeros((m, n))
        for i in range(m):
            for j in range(n):
                if boxes[i, 1] == targets[j, 1]:
                    idx[i, j] = 1
                    break
        boxes = boxes[idx.sum(dim=1, dtype=torch.uint8)]
        targets = targets[idx.sum(dim=0, dtype=torch.uint8)]
        boxes = boxes[torch.argsort(boxes[:, 1])]
        targets = targets[torch.argsort(targets[:, 1])]
        boxes[:,
              2:6] = clip_boxes_to_image(boxes[:, 2:6],
                                         (self.img_size[1], self.img_size[0]))
        targets[:, 2:6] = clip_boxes_to_image(
            targets[:, 2:6], (self.img_size[1], self.img_size[0]))
        return boxes, targets
示例#12
0
 def box_decoder(self, box_regression, proposals, image_shapes):
     boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
     pred_boxes = self.box_coder.decode(box_regression, proposals)
     pred_boxes = pred_boxes.split(boxes_per_image, 0)
     all_boxes = []
     for boxes, image_shape in zip(pred_boxes, image_shapes):
         boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
         boxes = boxes[:, 1:]
         boxes = boxes.reshape(-1, 4)
         all_boxes.append(boxes)
     return all_boxes
示例#13
0
    def postprocess_detections(self, class_logits, box_regression, proposals,
                               image_shapes):
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # boxes = boxes[:, 0]
            # scores = scores[:, 0]
            # labels = labels[:, 0]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
示例#14
0
    def forward(self, cls_logits:torch.Tensor, reg_deltas:torch.Tensor,
            fmap_dims:Tuple[int,int], img_dims:Tuple[int,int],
            nms_threshold:float=.7, keep_pre_nms:int=1000, keep_post_nms:int=300,
            dtype=torch.float32, device='cpu'):
        """
        Params:
            cls_logits torch.Tensor: torch.Tensor(bs x (h'*w'*nA) x 1) 
            reg_deltas torch.Tensor: torch.Tensor(bs x (h'*w'*nA) x 4)
            fmap_dims:Tuple[int,int] h',w'
            img_dims:Tuple[int,int] h,w

        Returns:
            batched_dets: List[torch.Tensor(N,5)] as xmin,ymin,xmax,ymax,score
        """
        bs = cls_logits.size(0)
        if self.cached_fmap_dims != fmap_dims:
            # generate anchors for each input
            self.anchors = self.anchor_generator(fmap_dims, img_dims, dtype=dtype, device=device)
            self.cached_fmap_dims = fmap_dims

        batched_dets:List[torch.Tensor] = []

        scores = torch.sigmoid(cls_logits.detach()).reshape(bs,-1)
        offsets = reg_deltas.detach().reshape(bs,-1,4)

        # convert offsets to boxes
        # bs,N,4 | N,4 => bs,N,4 as xmin,ymin,xmax,ymax
        boxes = offsets2boxes(offsets, self.anchors)

        # TODO vectorize this loop
        for i in range(bs):
            single_boxes = boxes[i]
            single_scores = scores[i]
            N = single_scores.size(0)
            
            # select top n
            _,selected_ids = single_scores.topk( min(keep_pre_nms,N) )
            single_scores,single_boxes = single_scores[selected_ids], single_boxes[selected_ids]

            # clip boxes
            single_boxes = box_ops.clip_boxes_to_image(single_boxes, img_dims)

            # nms
            keep = box_ops.nms(single_boxes, single_scores, nms_threshold)
            single_scores,single_boxes = single_scores[keep], single_boxes[keep]

            # post_n
            keep_post_nms = min(keep_post_nms, single_boxes.size(0))
            single_scores,single_boxes = single_scores[:keep_post_nms], single_boxes[:keep_post_nms]

            batched_dets.append( torch.cat([single_boxes,single_scores.unsqueeze(-1)], dim=-1) )

        return batched_dets
示例#15
0
    def predict_boxes(self, boxes):
        device = list(self.parameters())[0].device

        boxes = boxes.to(device)

        try:
            boxes = resize_boxes(boxes, self.original_image_sizes[0],
                                 self.preprocessed_images.image_sizes[0])
        except IndexError:
            print(boxes.size())
            raise IndexError
        proposals = [boxes]

        box_features = self.roi_heads.box_roi_pool(
            self.features, proposals, self.preprocessed_images.image_sizes)
        box_features = self.roi_heads.box_head(box_features)
        class_logits, box_regression = self.roi_heads.box_predictor(
            box_features)

        pred_boxes = self.roi_heads.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        # score_thresh = self.roi_heads.score_thresh
        # nms_thresh = self.roi_heads.nms_thresh

        # self.roi_heads.score_thresh = self.roi_heads.nms_thresh = 1.0
        # self.roi_heads.score_thresh = 0.0
        # self.roi_heads.nms_thresh = 1.0
        # detections, detector_losses = self.roi_heads(
        #     features, [boxes.squeeze(dim=0)], images.image_sizes, targets)

        # self.roi_heads.score_thresh = score_thresh
        # self.roi_heads.nms_thresh = nms_thresh

        # detections = self.transform.postprocess(
        #     detections, images.image_sizes, original_image_sizes)

        # detections = detections[0]
        # return detections['boxes'].detach().cpu(), detections['scores'].detach().cpu()

        pred_boxes = pred_boxes[:, 1:].squeeze(dim=1).detach()
        pred_boxes = resize_boxes(pred_boxes,
                                  self.preprocessed_images.image_sizes[0],
                                  self.original_image_sizes[0])
        pred_scores = pred_scores[:, 1:].squeeze(dim=1).detach()
        pred_boxes = box_ops.clip_boxes_to_image(pred_boxes,
                                                 self.original_image_sizes[0])
        if self.version == 'v2':
            for box, box_feature in zip(pred_boxes, box_features):
                self.box_features[str(int(box[0])) + ',' + str(int(box[1])) +
                                  ',' + str(int(box[2])) + ',' +
                                  str(int(box[3]))] = box_feature
        return pred_boxes, pred_scores
    def postprocess_detections(
            self,
            pred_scores,  # type: Tensor
            pred_boxes,  # type: Tensor
            proposals,  # type: List[Tensor]
            image_shapes  # type: List[Tuple[int, int]]
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
        # device = class_logits.device
        # num_classes = class_logits.shape[-1]

        boxes_per_image = [
            boxes_in_image.shape[0] for boxes_in_image in proposals
        ]

        pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
        pred_scores_list = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes_list,
                                              pred_scores_list, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            #去掉背景类的得分
            scores = scores[:, 1:]
            # labels = labels[:, 1:]

            scores, labels = scores.max(dim=1)
            labels += 1  #目标的标签是从1开始的

            # remove low scoring boxes
            inds = torch.where(scores > self.score_thresh)[0]
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
示例#17
0
    def regress_tracks(self, blob):
        """Regress the position of the tracks and also checks their scores."""
        pos = self.get_pos()
        pos_now = pos
        # regress
        boxes, scores = self.obj_detect.predict_boxes(
            blob['img'], pos)  # raw boxes -> (x1,y1,x2,y2) N*4
        pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]

            # get crf metrics
            if t.track_count > 2:
                crf_metric = self.get_crf_metrics(t)
                self.all_metric_info.append(
                    crf_metric)  # append per track crf metric of this frame
            # ----

        # do crf inference
        if len(self.all_metric_info):
            marg_tuples = self.crf_inference(self.all_metric_info)
            for t_id, marg in marg_tuples:
                vals = range(t_id.n_opts)
                if len(t_id.labels) > 0:
                    vals = t_id.labels
                map_rv = np.argmax(marg)
                if map_rv == 0:
                    self.tracks_to_inactive([
                        inactive_t for inactive_t in self.tracks
                        if inactive_t.id == eval(t_id.name)
                    ])

        else:
            print(
                " the crf metric lists is empty,skip crf inference and use score instead"
            )
            # use score metric to inactive track ,此处先不写 score单独判断的情况了,假设先进行了crf的inactive又进行了score的inactive

        for i in range(len(self.tracks) - 1, -1, -1):
            if scores[i] <= self.regression_person_thresh:
                self.tracks_to_inactive([t])
            else:
                s.append(scores[i])
                # t.prev_pos = t.pos
                t.pos = pos[i].view(1, -1)

        return torch.Tensor(s[::-1]).cuda()
    def generate_anchors(self, x: Tensor) -> None:
        anchors = torch.cat([
            _generate_anchors(self.input_size, x.size(-1),
                              listify(anchor_sizes), self.aspect_ratios,
                              stride)
            for anchor_sizes, stride in zip(self.anchor_sizes, self.strides)
        ],
                            dim=0)

        # Filter anchors
        anchors = box_ops.clip_boxes_to_image(
            anchors, (self.input_size, self.input_size))
        keep = box_ops.remove_small_boxes(anchors, 1e-3)
        self.anchors = anchors[keep]
示例#19
0
    def postprocess_detections(self, class_logits, box_regression, proposals,
                               image_shapes):
        device = class_logits.device
        # bgr, GGO, C
        num_classes = class_logits.shape[-1]
        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)
        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        all_boxes = []
        all_scores = []
        all_labels = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)
            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()
            # for the prediction/segmentation: only exceeding the threshold(scores>box_score_thresh)
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
            #scores_c, labels_c, res_box = scores[inds_classifier], labels[inds_classifier], res_box[inds_classifier]
            # Since we have 2 predictions/RoI, need to match the final indices to the corresponding RoI (floor(div(2))
            roi_inds = torch.arange(labels.size()[0]).div_(2).to(device)
            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            # Keep the best boxes, sorted, discard the rest
            scores, boxes, labels = scores[keep], boxes[keep], labels[keep]
            #all_boxes.append(boxes)
            all_scores.append(scores)
            # detections
            all_boxes.append(boxes)
            all_labels.append(labels)

        return all_boxes, all_scores, all_labels
示例#20
0
    def __getitem__(self, index):
        single_item = self.roidb[index]
        # Image
        im = imread(single_item['img_path'])  # RGB, HWC, 0-255
        # im = np.array(Image.open(single_item['img_path']).convert('RGB'))
        if len(im.shape) == 2:
            im = im[:, :, np.newaxis]
            im = np.concatenate((im, im, im), axis=2)

        # divided by 255 for PyTorch pre-trained model
        if self.div:
            im = im / 255.
        # flip the channel, RGB to BGR for caffe pre-trained model
        if self.BGR:
            im = im[:, :, ::-1]
        if single_item['flipped']:
            im = im[:, ::-1, :]
        im = im.astype(np.float32, copy=False)
        image = torch.from_numpy(im).permute(2, 0, 1)  # HWC to CHW

        # Targets
        gt_boxes = single_item['boxes'].astype(np.int32, copy=False)
        gt_boxes = torch.from_numpy(gt_boxes).float()  # TODO(BUG): dtype
        # clip boxes of which coordinates out of the image resolution
        gt_boxes = clip_boxes_to_image(gt_boxes, tuple(image.shape[1:]))

        target = dict(
            boxes=gt_boxes,  # (num_boxes 4)
            labels=torch.from_numpy(single_item['gt_classes']).int(),
            pids=torch.from_numpy(single_item['gt_pids']).long(),
            img_name=single_item['img_name'],
        )
        if 'mask_path' in single_item:
            # foreground mask
            mask = imread(single_item['mask_path']).astype(
                np.int32, copy=False)  # (h w) in {0,255}
            assert np.ndim(mask) == 2
            if single_item['flipped']: mask = mask[:, ::-1]
            target['mask'] = torch.from_numpy(
                mask.copy())[None] / 255.  # 3D tensor(1HW) in {0,1}

        item = dict(image=image, target=target)

        # visualization
        # util.plot_gt_on_img([image], [target], write_path=
        # "/home/caffe/code/deep-person-search/cache/img_with_gt_box/gt%d.jpg" % np.random.choice(list(range(10)), 1))

        return item
示例#21
0
    def get_boxes(self, box_regression, proposals, image_shapes):
        """
        Get boxes from proposals.
        """
        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)
        pred_boxes = pred_boxes.split(boxes_per_image, 0)

        all_boxes = []
        for boxes, image_shape in zip(pred_boxes, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
            # remove predictions with the background label
            boxes = boxes[:, 1:].reshape(-1, 4)
            all_boxes.append(boxes)

        return all_boxes
示例#22
0
def crop(img: Image, target: Dict[str, Any],
         region: Tuple[int]) -> Tuple[Image, Dict[str, Any]]:
    """
    Args:
        region: [Top, Left, H, W]
    """
    # crop image
    src_w, src_h = img.size
    img = TF.crop(img, *region)

    target = deepcopy(target)
    top, left, h, w = region

    # set new image size
    if "size" in target.keys():
        target["size"] = (h, w)

    fields: List[str] = list()
    for k, v in target.items():
        if isinstance(v, Tensor):
            fields.append(k)

    # crop bounding boxes
    if "boxes" in target:
        boxes = target["boxes"]
        boxes[:, [0, 2]] *= src_w
        boxes[:, [1, 3]] *= src_h
        boxes = box_op.box_convert(boxes, "cxcywh", "xyxy")
        boxes -= torch.tensor([left, top, left, top])
        boxes = box_op.clip_boxes_to_image(boxes, (h, w))
        keep = box_op.remove_small_boxes(boxes, 1)
        boxes[:, [0, 2]] /= w
        boxes[:, [1, 3]] /= h
        boxes = box_op.box_convert(boxes, "xyxy", "cxcywh")
        target["boxes"] = boxes
        for field in fields:
            target[field] = target[field][keep]

    if "masks" in target:
        target['masks'] = target['masks'][:, top:top + h, left:left + w]
        keep = target['masks'].flatten(1).any(1)
        for field in fields:
            target[field] = target[field][keep]

    return img, target
示例#23
0
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                                 image_shapes):
            # For onnx export, Clip's min max can not be traced as tensor.
            if torchvision._is_tracing():
                boxes = _onnx_clip_boxes_to_image(boxes, img_shape)
            else:
                boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
示例#24
0
    def __call__(self, proposals, image_shapes):

        # randomly choose an augmentation op
        augmentop = random.choice(self.ops)

        prealization = []

        for pboxes, image_shape in zip(proposals, image_shapes):

            # noinspection PyArgumentList
            boxes = augmentop(pboxes)

            # make sure it still fits within the image bounds
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            prealization.append(boxes)

        return prealization
示例#25
0
    def regress_tracks(self, blob):
        '''
        Regresses the position of the tracks and also checks their scores
        '''
        pos = self.get_pos()
        boxes, scores = self.obj_detect.predict_boxes(blob['img'], pos)
        pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]
            if scores[i] < self.regression_person_thresh:
                self.tracks_to_inactive([t])
            else:
                s.append(scores[i])
                t.pos = pos[i].view(1, -1)

        return torch.Tensor(s[::-1]).cuda()
示例#26
0
文件: oracle.py 项目: bjuncek/detr
	def regress_tracks(self, blob):
		pos = self.get_pos()

		# regress
		boxes, scores = self.obj_detect.predict_boxes(pos)
		pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

		s = []
		for i in range(len(self.tracks) - 1, -1, -1):
			t = self.tracks[i]
			t.score = scores[i]

			if scores[i] <= self.regression_person_thresh and not self.kill_oracle:
				self.tracks_to_inactive([t])
			else:
				s.append(scores[i])
				if self.regress:
					t.pos = pos[i].view(1, -1)

		return torch.Tensor(s[::-1]).cuda()
示例#27
0
    def regress_tracks(self, boxes, cls_conf, img):
        """Regress the position of the tracks and also checks their scores."""
        pos = self.get_pos()

        # regress
        boxes, scores = boxes, cls_conf
        pos = clip_boxes_to_image(boxes, img.shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]
            if scores[i] <= self.regression_person_thresh:
                self.tracks_to_inactive([t])
            else:
                s.append(scores[i])
                # t.prev_pos = t.pos
                t.pos = pos[i].view(1, -1)

        return torch.Tensor(s[::-1]).cuda()
示例#28
0
    def regress_tracks(self, blob):
        """Regress the position of the tracks and also checks their scores."""
        pos = self.get_pos()
        #         print('pos: ',pos)
        # regress
        boxes, scores = self.obj_detect.predict_boxes(
            pos)  # FIX THIS, can I just replace with detect, don't think so
        #boxes,scores self.obj_detect.detect(pos)
        pos = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

        s = []
        for i in range(len(self.tracks) - 1, -1, -1):
            t = self.tracks[i]
            t.score = scores[i]
            if scores[i] <= self.regression_person_thresh:
                self.tracks_to_inactive([t])
            else:
                s.append(scores[i])
                # t.prev_pos = t.pos
                t.pos = pos[i].view(1, -1)

        return torch.Tensor(s[::-1]).cuda()
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)
        batch_idx = torch.arange(num_images, device=device)[:, None]
        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                                 image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            #lvl=torch.tensor(np.arange(len(lvl))).to(device)
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
示例#30
0
    def step(self, blob):
        """This function should be called every timestep to perform tracking with a blob
		containing the image information.
		"""
        for t in self.tracks:
            # add current position to last_pos list
            t.last_pos.append(t.pos.clone())

        ###########################
        # Look for new detections #
        ###########################

        self.obj_detect.load_image(blob['img'])

        if self.public_detections:
            dets = blob['dets'].squeeze(dim=0)
            if dets.nelement() > 0:
                boxes, scores = self.obj_detect.predict_boxes(dets)
            else:
                boxes = scores = torch.zeros(0).cuda()
        else:
            boxes, scores = self.obj_detect.detect(blob['img'])

        if boxes.nelement() > 0:
            boxes = clip_boxes_to_image(boxes, blob['img'].shape[-2:])

            # Filter out tracks that have too low person score
            inds = torch.gt(scores,
                            self.detection_person_thresh).nonzero().view(-1)
        else:
            inds = torch.zeros(0).cuda()

        if inds.nelement() > 0:
            det_pos = boxes[inds]

            det_scores = scores[inds]
        else:
            det_pos = torch.zeros(0).cuda()
            det_scores = torch.zeros(0).cuda()

        ##################
        # Predict tracks #
        ##################

        num_tracks = 0
        nms_inp_reg = torch.zeros(0).cuda()
        if len(self.tracks):
            # align
            if self.do_align:
                self.align(blob)

            # apply motion model
            if self.motion_model_cfg['enabled']:
                self.motion()
                self.tracks = [t for t in self.tracks if t.has_positive_area()]

            # regress
            person_scores = self.regress_tracks(blob)

            if len(self.tracks):
                # create nms input

                # nms here if tracks overlap
                keep = nms(self.get_pos(), person_scores,
                           self.regression_nms_thresh)

                self.tracks_to_inactive([
                    self.tracks[i] for i in list(range(len(self.tracks)))
                    if i not in keep
                ])

                if keep.nelement() > 0 and self.do_reid:
                    new_features = self.get_appearances(blob)
                    self.add_features(new_features)

        #####################
        # Create new tracks #
        #####################

        # !!! Here NMS is used to filter out detections that are already covered by tracks. This is
        # !!! done by iterating through the active tracks one by one, assigning them a bigger score
        # !!! than 1 (maximum score for detections) and then filtering the detections with NMS.
        # !!! In the paper this is done by calculating the overlap with existing tracks, but the
        # !!! result stays the same.
        if det_pos.nelement() > 0:
            keep = nms(det_pos, det_scores, self.detection_nms_thresh)
            det_pos = det_pos[keep]
            det_scores = det_scores[keep]

            # check with every track in a single run (problem if tracks delete each other)
            for t in self.tracks:
                nms_track_pos = torch.cat([t.pos, det_pos])
                nms_track_scores = torch.cat(
                    [torch.tensor([2.0]).to(det_scores.device), det_scores])
                keep = nms(nms_track_pos, nms_track_scores,
                           self.detection_nms_thresh)

                keep = keep[torch.ge(keep, 1)] - 1

                det_pos = det_pos[keep]
                det_scores = det_scores[keep]
                if keep.nelement() == 0:
                    break

        if det_pos.nelement() > 0:
            new_det_pos = det_pos
            new_det_scores = det_scores

            # try to reidentify tracks
            new_det_pos, new_det_scores, new_det_features = self.reid(
                blob, new_det_pos, new_det_scores)

            # add new
            if new_det_pos.nelement() > 0:
                self.add(new_det_pos, new_det_scores, new_det_features)

        ####################
        # Generate Results #
        ####################

        for t in self.tracks:
            if t.id not in self.results.keys():
                self.results[t.id] = {}
            self.results[t.id][self.im_index] = np.concatenate(
                [t.pos[0].cpu().numpy(),
                 np.array([t.score])])

        for t in self.inactive_tracks:
            t.count_inactive += 1

        self.inactive_tracks = [
            t for t in self.inactive_tracks if t.has_positive_area()
            and t.count_inactive <= self.inactive_patience
        ]

        self.im_index += 1
        self.last_image = blob['img'][0]