示例#1
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        assert 'pred_boxes' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs['pred_boxes'][idx]
        target_boxes = torch.cat(
            [t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)

        src_boxes_coordinates = src_boxes[:, :2]
        src_boxes_dimensions = src_boxes[:, 2:]
        target_boxes_coordinates = target_boxes[:, :2]
        target_boxes_dimensions = target_boxes[:, 2:]

        loss_bbox_coordinates = F.l1_loss(src_boxes_coordinates,
                                          target_boxes_coordinates,
                                          reduction='none')
        loss_bbox_dimensions = F.l1_loss(src_boxes_dimensions,
                                         target_boxes_dimensions,
                                         reduction='none')

        losses = {}
        losses['loss_bbox_coordinates'] = loss_bbox_coordinates.sum(
        ) / num_boxes
        losses['loss_bbox_dimensions'] = loss_bbox_dimensions.sum() / num_boxes

        loss_giou = 1 - torch.diag(
            box_ops.generalized_box_iou(
                box_ops.box_cxcywh_to_xyxy(src_boxes),
                box_ops.box_cxcywh_to_xyxy(target_boxes)))
        losses['loss_giou'] = loss_giou.sum() / num_boxes
        return losses
示例#2
0
    def loss_boxes(self, outputs, gt_instances: List[Instances], indices: List[tuple], num_boxes):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size.
        """
        # We ignore the regression loss of the track-disappear slots.
        #TODO: Make this filter process more elegant.
        filtered_idx = []
        for src_per_img, tgt_per_img in indices:
            keep = tgt_per_img != -1
            filtered_idx.append((src_per_img[keep], tgt_per_img[keep]))
        indices = filtered_idx
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs['pred_boxes'][idx]
        target_boxes = torch.cat([gt_per_img.boxes[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0)

        # for pad target, don't calculate regression loss, judged by whether obj_id=-1
        target_obj_ids = torch.cat([gt_per_img.obj_ids[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0) # size(16)
        mask = (target_obj_ids != -1)

        loss_bbox = F.l1_loss(src_boxes[mask], target_boxes[mask], reduction='none')
        loss_giou = 1 - torch.diag(box_ops.generalized_box_iou(
            box_ops.box_cxcywh_to_xyxy(src_boxes[mask]),
            box_ops.box_cxcywh_to_xyxy(target_boxes[mask])))

        losses = {}
        losses['loss_bbox'] = loss_bbox.sum() / num_boxes
        losses['loss_giou'] = loss_giou.sum() / num_boxes

        return losses
示例#3
0
    def _add_fp_tracks(self, track_instances: Instances,
                       active_track_instances: Instances) -> Instances:
        inactive_instances = track_instances[track_instances.obj_idxes < 0]

        # add fp for each active track in a specific probability.
        fp_prob = torch.ones_like(
            active_track_instances.scores) * self.fp_ratio
        selected_active_track_instances = active_track_instances[
            torch.bernoulli(fp_prob).bool()]

        if len(inactive_instances) > 0 and len(
                selected_active_track_instances) > 0:
            num_fp = len(selected_active_track_instances)
            if num_fp >= len(inactive_instances):
                fp_track_instances = inactive_instances
            else:
                inactive_boxes = Boxes(
                    box_ops.box_cxcywh_to_xyxy(inactive_instances.pred_boxes))
                selected_active_boxes = Boxes(
                    box_ops.box_cxcywh_to_xyxy(
                        selected_active_track_instances.pred_boxes))
                ious = pairwise_iou(inactive_boxes, selected_active_boxes)
                # select the fp with the largest IoU for each active track.
                fp_indexes = ious.max(dim=0).indices

                # remove duplicate fp.
                fp_indexes = torch.unique(fp_indexes)
                fp_track_instances = inactive_instances[fp_indexes]

            merged_track_instances = Instances.cat(
                [active_track_instances, fp_track_instances])
            return merged_track_instances

        return active_track_instances
示例#4
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
        The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        assert "pred_boxes" in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs["pred_boxes"].numpy()[
            idx[0].numpy(), idx[1].numpy(), :]  # [num_objects, 4]
        src_boxes = dg.to_variable(src_boxes)

        target_boxes = [
            t["boxes"].numpy()[i.numpy()]
            for t, (_, i) in zip(targets, indices)
        ]
        target_boxes = [dg.to_variable(t) for t in target_boxes]
        target_boxes = L.concat(target_boxes,
                                0).astype("float32")  # [num_objects, 4]
        loss_bbox = F.loss.l1_loss(src_boxes, target_boxes, reduction="sum")

        losses = {}
        losses["loss_bbox"] = loss_bbox / num_boxes

        num_boxes = src_boxes.shape[0]
        mask = T.creation.diag(dg.to_variable(
            np.ones(num_boxes)))  # mask out non-diag element
        loss_giou = (1 - box_ops.generalied_box_iou(
            box_ops.box_cxcywh_to_xyxy(src_boxes),
            box_ops.box_cxcywh_to_xyxy(target_boxes))) * mask
        losses["loss_giou"] = L.reduce_sum(loss_giou) / num_boxes
        return losses
示例#5
0
    def forward(self, outputs, targets):
        """ Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        bs, num_queries = outputs["pred_logits"].shape[:2]

        # We flatten to compute the cost matrices in a batch
        out_prob = outputs["pred_logits"].flatten(0, 1).softmax(
            -1)  # [batch_size * num_queries, num_classes]
        out_bbox = outputs["pred_boxes"].flatten(
            0, 1)  # [batch_size * num_queries, 4]

        # Also concat the target labels and boxes
        tgt_ids = torch.cat([v["labels"] for v in targets])
        tgt_bbox = torch.cat([v["boxes"] for v in targets])

        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
        # but approximate it in 1 - proba[target class].
        # The 1 is a constant that doesn't change the matching, it can be ommitted.
        # 在out_prob中,dim1中每一维都代表了对每个该bbox的所属类别的概率,由于有92个类,所以有92个数字
        # 由于candidate box远比实际的box数量要多,因此并不知道到底哪个candidate能与gt box进行匹配
        # 所以先获取所有tgt_id,并在out_ptob中取出对应的概率,因为知道在众多candidate中必有一个bbox与某个gt bbox最为匹配
        # 之所以用减号就是想知道与理想概率1的差距,但这里加不加1其实无所谓
        cost_class = -out_prob[:, tgt_ids]

        # Compute the L1 cost between boxes
        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)

        # Compute the giou cost betwen boxes
        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox),
                                         box_cxcywh_to_xyxy(tgt_bbox))

        # Final cost matrix
        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
        C = C.view(bs, num_queries, -1).cpu()

        sizes = [len(v["boxes"]) for v in targets]
        indices = [
            linear_sum_assignment(c[i])
            for i, c in enumerate(C.split(sizes, -1))
        ]
        return [(torch.as_tensor(i, dtype=torch.int64),
                 torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#6
0
    def forward(self, outputs, targets, positive_map):
        """Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        bs, num_queries = outputs["pred_logits"].shape[:2]

        # We flatten to compute the cost matrices in a batch
        out_prob = self.norm(outputs["pred_logits"].flatten(
            0, 1))  # [batch_size * num_queries, num_classes]
        out_bbox = outputs["pred_boxes"].flatten(
            0, 1)  # [batch_size * num_queries, 4]

        # Also concat the target labels and boxes
        tgt_bbox = torch.cat([v["boxes"] for v in targets])
        assert len(tgt_bbox) == len(positive_map)

        # Compute the soft-cross entropy between the predicted token alignment and the GT one for each box
        cost_class = -(out_prob.unsqueeze(1) *
                       positive_map.unsqueeze(0)).sum(-1)

        # Compute the L1 cost between boxes
        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
        assert cost_class.shape == cost_bbox.shape

        # Compute the giou cost betwen boxes
        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox),
                                         box_cxcywh_to_xyxy(tgt_bbox))

        # Final cost matrix
        C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
        C = C.view(bs, num_queries, -1).cpu()

        sizes = [len(v["boxes"]) for v in targets]
        indices = [
            linear_sum_assignment(c[i])
            for i, c in enumerate(C.split(sizes, -1))
        ]
        return [(torch.as_tensor(i, dtype=torch.int64),
                 torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#7
0
    def forward(self, outputs, targets):
        """ Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        bs, num_queries = outputs["pred_logits"].shape[:2]

        # We flatten to compute the cost matrices in a batch
        out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]

        # Also concat the target labels and boxes
        tgt_ids = torch.cat([v["labels"] for v in targets])
        tgt_bbox = torch.cat([v["boxes"] for v in targets])

        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
        # but approximate it in 1 - proba[target class].
        # The 1 is a constant that doesn't change the matching, it can be ommitted.
        cost_class = -out_prob[:, tgt_ids]

        # Compute the L1 cost between boxes
        # Note cdist with p=1 is the manhattan distance norm
        # cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
        out_bbox_coordinates = out_bbox[:,:2]
        out_bbox_dimensions = out_bbox[:,2:]
        tgt_bbox_coordinates = tgt_bbox[:,:2]
        tgt_bbox_dimensions = tgt_bbox[:,2:]
        cost_bbox_coordinates = torch.cdist(out_bbox_coordinates, tgt_bbox_coordinates, p=1)
        cost_bbox_dimensions = torch.cdist(out_bbox_dimensions, tgt_bbox_dimensions, p=1)

        # Compute the giou cost betwen boxes
        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))

        # Final cost matrix
        C = self.cost_bbox_coordinates * cost_bbox_coordinates + self.cost_bbox_dimensions * cost_bbox_dimensions + self.cost_class * cost_class + self.cost_giou * cost_giou
        C = C.view(bs, num_queries, -1).cpu()

        sizes = [len(v["boxes"]) for v in targets]
        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#8
0
    def forward(self, outputs, targets):
        """ Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        with torch.no_grad():
            bs, num_queries = outputs["pred_logits"].shape[:2]

            # We flatten to compute the cost matrices in a batch
            out_prob = outputs["pred_logits"].flatten(0, 1).sigmoid()
            out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]

            # Also concat the target labels and boxes
            tgt_ids = torch.cat([v["labels"] for v in targets])
            tgt_bbox = torch.cat([v["boxes"] for v in targets])

            # Compute the classification cost.
            alpha = 0.25
            gamma = 2.0
            neg_cost_class = (1 - alpha) * (out_prob ** gamma) * (-(1 - out_prob + 1e-8).log())
            pos_cost_class = alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
            cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]

            # Compute the L1 cost between boxes
            cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)

            # Compute the giou cost betwen boxes
            cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox),
                                             box_cxcywh_to_xyxy(tgt_bbox))

            # Final cost matrix
            C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
            C = C.view(bs, num_queries, -1).cpu()

            sizes = [len(v["boxes"]) for v in targets]
            indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
            return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#9
0
    def get_cls_loss(self,
                     outputs,
                     targets,
                     criterion,
                     cls_losses,
                     weights=None):
        """

        """
        # TODO: make sure we are not backpropagating from here
        # (probably from the function that will call this(with no grad)

        outputs_without_aux = {
            k: v
            for k, v in outputs.items() if k != 'aux_outputs'
        }
        indices = criterion.matcher(outputs_without_aux, targets)

        src_idx = criterion._get_src_permutation_idx(indices)

        # order the labels by the indices
        target_classes = torch.cat(
            [t["labels"][J] for t, (_, J) in zip(targets, indices)])  # BOXES
        src_logits = outputs['pred_logits'][src_idx]  # (BOXES) X C

        # order the bounding boxes by the indices
        target_boxes = torch.cat(
            [t['boxes'][i] for t, (_, i) in zip(targets, indices)],
            dim=0)  # BOXES
        src_boxes = outputs['pred_boxes'][src_idx]  # BOXES

        classes = torch.unique(target_classes)
        for cls in classes:
            idx = torch.where(target_classes == cls)[0]

            loss_ce = F.cross_entropy(src_logits[idx],
                                      target_classes[idx],
                                      reduction='sum')

            loss_bbox = F.l1_loss(src_boxes[idx],
                                  target_boxes[idx],
                                  reduction='sum')

            loss_giou = (1 - torch.diag(
                box_ops.generalized_box_iou(
                    box_ops.box_cxcywh_to_xyxy(src_boxes[idx]),
                    box_ops.box_cxcywh_to_xyxy(target_boxes[idx])))).sum()

            losses = torch.tensor([len(idx), loss_ce, loss_bbox, loss_giou])
            cls_losses[cls] += losses

        return cls_losses
    def forward(self, outputs, target_sizes):
        """ Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']
        track_logits, track_bbox = outputs['tracking_logits'], outputs[
            'tracking_boxes']

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = out_logits.sigmoid()
        track_prob = track_logits.sigmoid()
        #         topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
        #         scores = topk_values
        #         topk_boxes = topk_indexes // out_logits.shape[2]
        #         labels = topk_indexes % out_logits.shape[2]
        #         boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        #         boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1,1,4))

        scores, labels = prob[..., 1:2].max(-1)
        labels = labels + 1
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)

        track_scores, track_labels = track_prob[..., 1:2].max(-1)
        track_labels = track_labels + 1
        track_boxes = box_ops.box_cxcywh_to_xyxy(track_bbox)

        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]
        track_boxes = track_boxes * scale_fct[:, None, :]

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b,
            'track_scores': ts,
            'track_labels': tl,
            'track_boxes': tb
        } for s, l, b, ts, tl, tb in zip(scores, labels, boxes, track_scores,
                                         track_labels, track_boxes)]

        return results
示例#11
0
    def inference(self, box_cls, box_pred, mask_pred, image_sizes):
        """
        Arguments:
            box_cls (Tensor): tensor of shape (batch_size, num_queries, K).
                The tensor predicts the classification probability for each query.
            box_pred (Tensor): tensors of shape (batch_size, num_queries, 4).
                The tensor predicts 4-vector (x,y,w,h) box
                regression values for every queryx
            image_sizes (List[torch.Size]): the input image sizes

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(box_cls) == len(image_sizes)
        results = []
        scores, labels = F.softmax(box_cls, axis=-1)[:, :, :-1].max(-1)
        for i, (scores_per_image, labels_per_image, box_pred_per_image,
                image_size) in enumerate(
                    zip(scores, labels, box_pred, image_sizes)):
            result = Instances(image_size)
            result.pred_boxes = Boxes(box_cxcywh_to_xyxy(box_pred_per_image))
            result.pred_boxes.scale(scale_x=image_size[1], scale_y=\
                image_size[0])
            if self.mask_on:
                mask = F.interpolate(mask_pred[i].unsqueeze(0), size=\
                    image_size, mode='bilinear', align_corners=False)
                mask = mask[0].sigmoid() > 0.5
                B, N, H, W = mask_pred.shape
                mask = BitMasks(mask.cpu()).crop_and_resize(
                    result.pred_boxes.tensor.cpu(), 32)
                result.pred_masks = mask.unsqueeze(1).to(mask_pred[0].device)
            result.scores = scores_per_image
            result.pred_classes = labels_per_image
            results.append(result)
        return results
示例#12
0
    def inference(self, box_cls, box_pred, image_sizes):
        """
        Arguments:
            box_cls (Tensor): tensor of shape (batch_size, num_queries, K).
                The tensor predicts the classification probability for each query.
            box_pred (Tensor): tensors of shape (batch_size, num_queries, 4).
                The tensor predicts 4-vector (x,y,w,h) box
                regression values for every queryx
            image_sizes (List[torch.Size]): the input image sizes

        Returns:
            results (List[Instances]): a list of #images elements.
        """
        assert len(box_cls) == len(image_sizes)
        results = []

        # For each box we assign the best class or the second best if the best on is `no_object`.
        scores, labels = F.softmax(box_cls, dim=-1)[:, :, :-1].max(-1)

        for scores_per_image, labels_per_image, box_pred_per_image, image_size in zip(
                scores, labels, box_pred, image_sizes):
            result = Instances(image_size)
            result.pred_boxes = Boxes(box_cxcywh_to_xyxy(box_pred_per_image))

            result.pred_boxes.scale(scale_x=image_size[1],
                                    scale_y=image_size[0])

            result.scores = scores_per_image
            result.pred_classes = labels_per_image
            results.append(result)
        return results
示例#13
0
    def forward(self, outputs, target_sizes):
        """ Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = F.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)

        # convert to [x0, y0, x1, y1] format
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores, labels, boxes)]

        return results
示例#14
0
def add_mask(img_list, batched_boxes, batched_masks):
    new_img_list = []
    for im, boxes, masks in zip(img_list, batched_boxes, batched_masks):
        img_h, img_w = im.shape[-2:]
        boxes = box_cxcywh_to_xyxy(boxes)
        multiplier = torch.tensor([img_w, img_h, img_w, img_h],
                                  dtype=torch.float32).cuda()
        boxes = boxes * multiplier
        boxes = boxes.int().clamp(min=0)
        for i, (box, mask) in enumerate(zip(boxes, masks)):
            box[3] = min(img_h, box[3])
            box[2] = min(img_w, box[2])
            dh = box[3] - box[1]
            dw = box[2] - box[0]
            conv_mask = F.interpolate(mask.view((1, 1) + mask.shape),
                                      size=(dh, dw),
                                      mode='bilinear')
            th_mask = conv_mask > 0.5
            if th_mask.sum() == 0:
                continue
            try:
                im[:, box[1]:box[3],
                   box[0]:box[2]][th_mask[0].repeat(len(im), 1, 1)] = -1000
            except BaseException:
                pdb.set_trace()
        new_img_list.append(im)
    return new_img_list
示例#15
0
    def forward(self, track_instances: Instances, target_size) -> Instances:
        """ Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits = track_instances.pred_logits
        out_bbox = track_instances.pred_boxes

        prob = out_logits.sigmoid()
        # prob = out_logits[...,:1].sigmoid()
        scores, labels = prob.max(-1)

        # convert to [x0, y0, x1, y1] format
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_size
        scale_fct = torch.Tensor([img_w, img_h, img_w, img_h]).to(boxes)
        boxes = boxes * scale_fct[None, :]

        track_instances.boxes = boxes
        track_instances.scores = scores
        track_instances.labels = labels
        track_instances.remove('pred_logits')
        track_instances.remove('pred_boxes')
        return track_instances
示例#16
0
文件: misc.py 项目: MaratZakirov/detr
def showImage(img, target):
    from PIL import Image, ImageDraw, ImageFont
    from util.box_ops import box_xyxy_to_cxcywh, box_cxcywh_to_xyxy

    draw = ImageDraw.Draw(img)
    boxes = target['boxes']
    cl = target['labels']

    if 1:#boxes.max() <= 1:
        boxes = box_cxcywh_to_xyxy(boxes)

        print('Image:', (img.height, img.width), 'true:', target['size'], target['orig_size'])

        H, W = target['size']
        #W, H = img.width, img.height

        boxes[:, 0::2] *= W
        boxes[:, 1::2] *= H

    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        draw.rectangle((x1, y1, x2, y2), outline=(0, 255, 0) if cl[i] >= 0 else (0, 0, 0), width=3)
        draw.text((x1, y1), str(cl[i].item()), (0, 255, 0) if cl[i] >= 0 else (0, 0, 0),
                  font=ImageFont.truetype("DejaVuSansMono.ttf", 30))

    img.show()
示例#17
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        assert 'human_pred_boxes' in outputs
        assert 'object_pred_boxes' in outputs

        idx = self._get_src_permutation_idx(indices)

        human_src_boxes = outputs['human_pred_boxes'][idx]
        human_target_boxes = torch.cat(
            [t['human_boxes'][i] for t, (_, i) in zip(targets, indices)],
            dim=0)
        object_src_boxes = outputs['object_pred_boxes'][idx]
        object_target_boxes = torch.cat(
            [t['object_boxes'][i] for t, (_, i) in zip(targets, indices)],
            dim=0)

        human_loss_bbox = F.l1_loss(human_src_boxes,
                                    human_target_boxes,
                                    reduction='none')
        object_loss_bbox = F.l1_loss(object_src_boxes,
                                     object_target_boxes,
                                     reduction='none')

        losses = dict()
        losses['human_loss_bbox'] = human_loss_bbox.sum() / num_boxes
        losses['object_loss_bbox'] = object_loss_bbox.sum() / num_boxes
        losses['loss_bbox'] = losses['human_loss_bbox'] + losses[
            'object_loss_bbox']

        human_loss_giou = 1 - torch.diag(
            box_ops.generalized_box_iou(
                box_ops.box_cxcywh_to_xyxy(human_src_boxes),
                box_ops.box_cxcywh_to_xyxy(human_target_boxes)))
        object_loss_giou = 1 - torch.diag(
            box_ops.generalized_box_iou(
                box_ops.box_cxcywh_to_xyxy(object_src_boxes),
                box_ops.box_cxcywh_to_xyxy(object_target_boxes)))
        losses['human_loss_giou'] = human_loss_giou.sum() / num_boxes
        losses['object_loss_giou'] = object_loss_giou.sum() / num_boxes

        losses['loss_giou'] = losses['human_loss_giou'] + losses[
            'object_loss_giou']
        return losses
示例#18
0
文件: roi_heads.py 项目: L4zyy/detr
    def forward(self, features, boxes):
        b, q, _ = boxes.shape
        features = [features[f] for f in self.box_in_features]
        box_features = self.box_pooler(
            features, [Boxes(box_ops.box_cxcywh_to_xyxy(x)) for x in boxes])
        box_features = self.box_head(box_features)
        predictions = self.box_predictor(box_features)

        return predictions.view(b, q, self.num_classes + 1)
示例#19
0
文件: matcher.py 项目: yankai317/qpic
    def forward(self, outputs, targets):
        bs, num_queries = outputs['pred_obj_logits'].shape[:2]

        out_obj_prob = outputs['pred_obj_logits'].flatten(0, 1).softmax(-1)
        out_verb_prob = outputs['pred_verb_logits'].flatten(0, 1).sigmoid()
        out_sub_bbox = outputs['pred_sub_boxes'].flatten(0, 1)
        out_obj_bbox = outputs['pred_obj_boxes'].flatten(0, 1)

        tgt_obj_labels = torch.cat([v['obj_labels'] for v in targets])
        tgt_verb_labels = torch.cat([v['verb_labels'] for v in targets])
        tgt_verb_labels_permute = tgt_verb_labels.permute(1, 0)
        tgt_sub_boxes = torch.cat([v['sub_boxes'] for v in targets])
        tgt_obj_boxes = torch.cat([v['obj_boxes'] for v in targets])

        cost_obj_class = -out_obj_prob[:, tgt_obj_labels]

        tgt_verb_labels_permute = tgt_verb_labels.permute(1, 0)
        cost_verb_class = -(out_verb_prob.matmul(tgt_verb_labels_permute) / \
                            (tgt_verb_labels_permute.sum(dim=0, keepdim=True) + 1e-4) + \
                            (1 - out_verb_prob).matmul(1 - tgt_verb_labels_permute) / \
                            ((1 - tgt_verb_labels_permute).sum(dim=0, keepdim=True) + 1e-4)) / 2

        cost_sub_bbox = torch.cdist(out_sub_bbox, tgt_sub_boxes, p=1)
        cost_obj_bbox = torch.cdist(out_obj_bbox, tgt_obj_boxes, p=1) * (tgt_obj_boxes != 0).any(dim=1).unsqueeze(0)
        if cost_sub_bbox.shape[1] == 0:
            cost_bbox = cost_sub_bbox
        else:
            cost_bbox = torch.stack((cost_sub_bbox, cost_obj_bbox)).max(dim=0)[0]

        cost_sub_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_sub_bbox), box_cxcywh_to_xyxy(tgt_sub_boxes))
        cost_obj_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_obj_bbox), box_cxcywh_to_xyxy(tgt_obj_boxes)) + \
                        cost_sub_giou * (tgt_obj_boxes == 0).all(dim=1).unsqueeze(0)
        if cost_sub_giou.shape[1] == 0:
            cost_giou = cost_sub_giou
        else:
            cost_giou = torch.stack((cost_sub_giou, cost_obj_giou)).max(dim=0)[0]

        C = self.cost_obj_class * cost_obj_class + self.cost_verb_class * cost_verb_class + \
            self.cost_bbox * cost_bbox + self.cost_giou * cost_giou
        C = C.view(bs, num_queries, -1).cpu()

        sizes = [len(v['obj_labels']) for v in targets]
        indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
        return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#20
0
文件: hoi.py 项目: yankai317/qpic
    def forward(self, outputs, target_sizes):
        out_obj_logits, out_verb_logits, out_sub_boxes, out_obj_boxes = outputs['pred_obj_logits'], \
                                                                        outputs['pred_verb_logits'], \
                                                                        outputs['pred_sub_boxes'], \
                                                                        outputs['pred_obj_boxes']

        assert len(out_obj_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        obj_prob = F.softmax(out_obj_logits, -1)
        obj_scores, obj_labels = obj_prob[..., :-1].max(-1)

        verb_scores = out_verb_logits.sigmoid()

        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h],
                                dim=1).to(verb_scores.device)
        sub_boxes = box_cxcywh_to_xyxy(out_sub_boxes)
        sub_boxes = sub_boxes * scale_fct[:, None, :]
        obj_boxes = box_cxcywh_to_xyxy(out_obj_boxes)
        obj_boxes = obj_boxes * scale_fct[:, None, :]

        results = []
        for os, ol, vs, sb, ob in zip(obj_scores, obj_labels, verb_scores,
                                      sub_boxes, obj_boxes):
            sl = torch.full_like(ol, self.subject_category_id)
            l = torch.cat((sl, ol))
            b = torch.cat((sb, ob))
            results.append({'labels': l.to('cpu'), 'boxes': b.to('cpu')})

            vs = vs * os.unsqueeze(1)

            ids = torch.arange(b.shape[0])

            results[-1].update({
                'verb_scores': vs.to('cpu'),
                'sub_ids': ids[:ids.shape[0] // 2],
                'obj_ids': ids[ids.shape[0] // 2:]
            })

        return results
示例#21
0
    def loss_boxes(self, outputs, targets, positive_map, indices, num_boxes):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
        The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size.
        """
        assert "pred_boxes" in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs["pred_boxes"][idx]
        target_boxes = torch.cat(
            [t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)

        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")

        losses = {}
        losses["loss_bbox"] = loss_bbox.sum() / num_boxes

        loss_giou = 1 - torch.diag(
            box_ops.generalized_box_iou(
                box_ops.box_cxcywh_to_xyxy(src_boxes),
                box_ops.box_cxcywh_to_xyxy(target_boxes)))
        losses["loss_giou"] = loss_giou.sum() / num_boxes
        return losses
示例#22
0
文件: hoi.py 项目: yankai317/qpic
    def loss_sub_obj_boxes(self, outputs, targets, indices, num_interactions):
        assert 'pred_sub_boxes' in outputs and 'pred_obj_boxes' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_sub_boxes = outputs['pred_sub_boxes'][idx]
        src_obj_boxes = outputs['pred_obj_boxes'][idx]
        target_sub_boxes = torch.cat(
            [t['sub_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
        target_obj_boxes = torch.cat(
            [t['obj_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)

        exist_obj_boxes = (target_obj_boxes != 0).any(dim=1)

        losses = {}
        if src_sub_boxes.shape[0] == 0:
            losses['loss_sub_bbox'] = src_sub_boxes.sum()
            losses['loss_obj_bbox'] = src_obj_boxes.sum()
            losses['loss_sub_giou'] = src_sub_boxes.sum()
            losses['loss_obj_giou'] = src_obj_boxes.sum()
        else:
            loss_sub_bbox = F.l1_loss(src_sub_boxes,
                                      target_sub_boxes,
                                      reduction='none')
            loss_obj_bbox = F.l1_loss(src_obj_boxes,
                                      target_obj_boxes,
                                      reduction='none')
            losses['loss_sub_bbox'] = loss_sub_bbox.sum() / num_interactions
            losses['loss_obj_bbox'] = (loss_obj_bbox *
                                       exist_obj_boxes.unsqueeze(1)).sum() / (
                                           exist_obj_boxes.sum() + 1e-4)
            loss_sub_giou = 1 - torch.diag(
                generalized_box_iou(box_cxcywh_to_xyxy(src_sub_boxes),
                                    box_cxcywh_to_xyxy(target_sub_boxes)))
            loss_obj_giou = 1 - torch.diag(
                generalized_box_iou(box_cxcywh_to_xyxy(src_obj_boxes),
                                    box_cxcywh_to_xyxy(target_obj_boxes)))
            losses['loss_sub_giou'] = loss_sub_giou.sum() / num_interactions
            losses['loss_obj_giou'] = (loss_obj_giou * exist_obj_boxes
                                       ).sum() / (exist_obj_boxes.sum() + 1e-4)
        return losses
示例#23
0
    def forward(self, outputs, targets):
        """ Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        bs, num_queries = outputs['pred_logits'].shape[:2]
        out_prob = outputs['pred_logits'].flatten(0, 1).softmax(-1)
        out_bbox = outputs['pred_boxes'].flatten(0, 1)
        tgt_ids = torch2paddle.concat([v['labels'] for v in targets])
        tgt_bbox = torch2paddle.concat([v['boxes'] for v in targets])
        cost_class = -out_prob[:, tgt_ids]
        cost_bbox = paddle.dist(out_bbox, tgt_bbox, p=1)
        cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox),
                                         box_cxcywh_to_xyxy(tgt_bbox))
        C = (self.cost_bbox * cost_bbox + self.cost_class * cost_class +
             self.cost_giou * cost_giou)
        C = C.view(bs, num_queries, -1).cpu()
        sizes = [len(v['boxes']) for v in targets]
        indices = [
            linear_sum_assignment(c[i])
            for i, c in enumerate(C.split(sizes, -1))
        ]
        return [(paddle.to_tensor(i, dtype=torch.int64),
                 paddle.to_tensor(j, dtype=torch.int64)) for i, j in indices]
示例#24
0
    def init(self, img, bbox):
        """
        args:
            img(np.ndarray): BGR image
            bbox: (x, y, w, h) bbox (opencv format for rect)
        """

        bbox_xyxy  = [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
        self.center_pos = torch.Tensor([bbox[0] + bbox[2]/2, bbox[1] + bbox[3]/2])
        self.size = torch.Tensor(bbox[2:])
        channel_avg = np.mean(img, axis=(0, 1))

        # get crop
        s_z, scale_z = siamfc_like_scale(bbox_xyxy)
        template_image, _ = crop_image(img, bbox_xyxy, padding = channel_avg)

        self.rect_template_image = template_image.copy()
        init_bbox = np.array(self.size) * scale_z
        exemplar_size = get_exemplar_size()
        x1 = np.round(exemplar_size/2 - init_bbox[0]/2).astype(np.uint8)
        y1 = np.round(exemplar_size/2 - init_bbox[1]/2).astype(np.uint8)
        x2 = np.round(exemplar_size/2 + init_bbox[0]/2).astype(np.uint8)
        y2 = np.round(exemplar_size/2 + init_bbox[1]/2).astype(np.uint8)
        cv2.rectangle(self.rect_template_image, (x1, y1), (x2, y2), (0,255,0), 3)

        # get mask
        self.init_template_mask = [0, 0, template_image.shape[0], template_image.shape[1]]
        if self.center_pos[0] < s_z/2:
            self.init_template_mask[0] = (s_z/2 - self.center_pos[0]) * scale_z
        if self.center_pos[1] < s_z/2:
            self.init_template_mask[1] = (s_z/2 - self.center_pos[1]) * scale_z
        if self.center_pos[0] + s_z/2 > img.shape[1]:
            self.init_template_mask[2] = self.init_template_mask[2] - (self.center_pos[0] + s_z/2 - img.shape[1]) * scale_z
        if self.center_pos[1] + s_z/2 > img.shape[0]:
            self.init_template_mask[3] = self.init_template_mask[3] - (self.center_pos[1] + s_z/2 - img.shape[0]) * scale_z


        # normalize and conver to torch.tensor
        self.init_template = self.image_normalize(np.round(template_image).astype(np.uint8)).cuda()
        self.first_frame = True

        self.init_best_score = 0

        # for visualize
        debug_bbox = torch.round(box_cxcywh_to_xyxy(torch.cat([torch.tensor([63.5, 63.5]),  torch.Tensor([bbox[2], bbox[3]]) * scale_z]))).int().numpy()
        debug_image = cv2.rectangle(template_image,
                                    (debug_bbox[0], debug_bbox[1]),
                                    (debug_bbox[2], debug_bbox[3]),(0,255,0),3)

        return {'template_image': debug_image}
示例#25
0
    def loss_boxes(self,
                   outputs,
                   targets,
                   indices,
                   num_boxes,
                   boxes,
                   visible,
                   rnn_weight=0.5):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size.
        """
        assert 'pred_boxes' in outputs
        # pred_boxes needs to be associated with final target. This is largest distance between frames, for the Transformer.
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs['pred_boxes'][idx]
        target_boxes = torch.cat(
            [t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')

        losses = {}
        losses['loss_bbox'] = loss_bbox.sum() / num_boxes

        losses['loss_giou_circuit'] = torch.tensor(0)
        losses['loss_iou_circuit'] = torch.tensor(0)

        giou, iou = box_ops.generalized_box_iou(
            box_ops.box_cxcywh_to_xyxy(src_boxes),
            box_ops.box_cxcywh_to_xyxy(target_boxes))
        giou = torch.diag(giou)
        iou = torch.diag(iou)
        loss_giou = 1 - giou
        iou = iou
        losses['loss_giou'] = loss_giou.sum() / num_boxes
        losses['iou'] = iou.sum() / num_boxes
        return losses
示例#26
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
           targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
           The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        assert 'pred_boxes' in outputs
        # (batch indices, query indices)
        # shape都是(num_matched_queries1+num_matched_queries2+...)
        idx = self._get_src_permutation_idx(indices)
        # outputs['pred_boxes']的shape是(b,num_queries=100,4)
        # src_boxes的shape是(num_matched_queries1+num_matched_queries2+...,4)
        src_boxes = outputs['pred_boxes'][idx]
        # (num_matched_objs1+num_matched_objs2+...,4)
        # num_matched_queries1+num_matched_queries2+..., 和 num_matched_objs1+num_matched_objs2+...
        # 是相等的,在forward部分的matcher的返回结果注释中有说明。
        target_boxes = torch.cat(
            [t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)

        # 以下就是loss的计算。注意下 reduction 参数,若不显式进行设置,在Pytorch的实现中默认是'mean',即返回所有涉及误差计算的元素的均值。
        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction='none')

        losses = {}
        # num_boxes是一个batch图像中目标物体的数量
        losses['loss_bbox'] = loss_bbox.sum() / num_boxes

        # 由于generalized_box_iou返回的是每个预测结果与每个GT的giou,因此取对角线代表获取的是相互匹配的预测结果与GT的giou。
        # 在计算GIoU loss时,使用了torch.diag()获取对角线元素,这是因为generalized_box_iou()方法返回
        # 的是所有预测框与所有GT的GIoU,比如预测框有N个,GT有M个,那么返回结果就是NxM个GIoU。我们预先对
        # 匹配的预测框和GT进行了排列,即N个预测框中的第1个匹配M个GT中的第1个,N中第2个匹配M中第2个,..,N中
        # 第i个匹配M中第i个,于是我们要取相互匹配的那一项来计算loss。
        loss_giou = 1 - torch.diag(
            box_ops.generalized_box_iou(
                box_ops.box_cxcywh_to_xyxy(src_boxes),
                box_ops.box_cxcywh_to_xyxy(target_boxes)))
        losses['loss_giou'] = loss_giou.sum() / num_boxes
        return losses
示例#27
0
    def forward(self, outputs, target_sizes, state='train'):
        """ Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']

        # assert len(out_logits) == len(target_sizes)
        # assert target_sizes.shape[1] == 2

        prob = out_logits.sigmoid()
        topk_values, topk_indexes = torch.topk(prob.view(
            out_logits.shape[0], -1),
                                               100,
                                               dim=1)
        scores = topk_values
        topk_boxes = topk_indexes // out_logits.shape[2]
        labels = topk_indexes % out_logits.shape[2]
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = torch.gather(boxes, 1,
                             topk_boxes.unsqueeze(-1).repeat(1, 1, 4))

        # and from relative [0, 1] to absolute [0, height] coordinates
        # img_h, img_w = target_sizes.unbind(1)
        img_h, img_w = target_sizes[0], target_sizes[1]
        img_h = torch.tensor(img_h, device='cuda')
        img_w = torch.tensor(img_w, device='cuda')
        # scale_fct = torch.stack()
        if state == 'train':
            scale_fct = torch.stack([img_w, img_h, img_w, img_h],
                                    dim=1)  # train可以用
            boxes = boxes * scale_fct[:, None, :]  #train用
        else:
            scale_fct = torch.stack([img_w, img_h, img_w, img_h],
                                    dim=0)  #val可以用
            boxes = boxes * scale_fct  #val可以用

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b,
            'topk_index': i
        } for s, l, b, i in zip(scores, labels, boxes, topk_boxes)]

        return results
示例#28
0
    def eval(self, postprocessors, thresh=0.1):
        self._state.net.eval()
        associate = pocket.utils.BoxAssociation(min_iou=0.5)
        meter = pocket.utils.DetectionAPMeter(80, algorithm='INT', nproc=10)
        num_gt = torch.zeros(80)
        if self._train_loader.batch_size != 1:
            raise ValueError(
                f"The batch size shoud be 1, not {self._train_loader.batch_size}"
            )
        for image, target in tqdm(self._train_loader):
            image = pocket.ops.relocate_to_cuda(image)
            output = self._state.net(image)
            output = pocket.ops.relocate_to_cpu(output)
            scores, labels, boxes = postprocessors(
                output, target[0]['size'].unsqueeze(0))[0].values()
            keep = torch.nonzero(scores >= thresh).squeeze(1)
            scores = scores[keep]
            labels = labels[keep]
            boxes = boxes[keep]

            gt_boxes = target[0]['boxes']
            # Denormalise ground truth boxes
            gt_boxes = box_ops.box_cxcywh_to_xyxy(gt_boxes)
            h, w = target[0]['size']
            scale_fct = torch.stack([w, h, w, h])
            gt_boxes *= scale_fct
            gt_labels = target[0]['labels']

            for c in gt_labels:
                num_gt[c] += 1

            # Associate detections with ground truth
            binary_labels = torch.zeros(len(labels))
            unique_cls = labels.unique()
            for c in unique_cls:
                det_idx = torch.nonzero(labels == c).squeeze(1)
                gt_idx = torch.nonzero(gt_labels == c).squeeze(1)
                if len(gt_idx) == 0:
                    continue
                binary_labels[det_idx] = associate(
                    gt_boxes[gt_idx].view(-1, 4), boxes[det_idx].view(-1, 4),
                    scores[det_idx].view(-1))

            meter.append(scores, labels, binary_labels)

        meter.num_gt = num_gt.tolist()
        return meter.eval(), meter.max_rec
示例#29
0
    def forward(self, outputs, target_sizes):
        """ Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']

        batchsize = outputs['batchsize']
        num_episode = outputs['num_episode']
        num_queries = outputs['num_queries']
        num_classes = outputs['num_classes']

        out_logits = out_logits.view(batchsize, num_episode * num_queries,
                                     num_classes)
        out_bbox = out_bbox.view(batchsize, num_episode * num_queries, 4)

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = out_logits.sigmoid()
        topk_values, topk_indexes = torch.topk(prob.view(
            out_logits.shape[0], -1),
                                               100,
                                               dim=1)
        scores = topk_values
        topk_boxes = topk_indexes // out_logits.shape[2]
        labels = topk_indexes % out_logits.shape[2]
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = torch.gather(boxes, 1,
                             topk_boxes.unsqueeze(-1).repeat(1, 1, 4))

        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores, labels, boxes)]

        return results
示例#30
0
    def forward(self, outputs, target_sizes):
        """Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs["pred_logits"], outputs["pred_boxes"]

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = F.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)

        labels = torch.ones_like(labels)

        scores = 1 - prob[:, :, -1]

        # convert to [x0, y0, x1, y1] format
        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        assert len(scores) == len(labels) == len(boxes)
        results = [{
            "scores": s,
            "labels": l,
            "boxes": b
        } for s, l, b in zip(scores, labels, boxes)]

        if "pred_isfinal" in outputs:
            is_final = outputs["pred_isfinal"].sigmoid()
            scores_refexp = scores * is_final.view_as(scores)
            assert len(results) == len(scores_refexp)
            for i in range(len(results)):
                results[i]["scores_refexp"] = scores_refexp[i]

        return results