Exemplo n.º 1
0
def fast_rcnn_inference_single_image(boxes, scores, image_shape, score_thresh,
                                     nms_thresh, topk_per_image):
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    idxs = torch.arange(num_objs).cuda() * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = max_classes[keep]

    return result, keep
Exemplo n.º 2
0
def fast_rcnn_inference_single_image(
        boxes, scores, image_shape, nms_thresh, topk_per_image
):
    """
    Single-image inference. Return bounding-box detection results by thresholding
    on scores and applying non-maximum suppression (NMS).

    Args:
        Same as `fast_rcnn_inference`, but with boxes, scores, and image shapes
        per image.

    Returns:
        Same as `fast_rcnn_inference`, but for only one image.
    """
    class_distr_scores = scores.clone()
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R
    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    num_objs = torch.arange(num_objs)
    if torch.cuda.is_available():
        num_objs = num_objs.cuda()
    idxs = num_objs * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    class_distr_scores = class_distr_scores[keep]
    # we set the background probability to 0
    class_distr_scores[:, -1] = 0.0
    result.scores = class_distr_scores

    return result, keep
Exemplo n.º 3
0
def fast_rcnn_inference_single_image(boxes,
                                     scores,
                                     image_shape,
                                     score_thresh,
                                     nms_thresh,
                                     topk_per_image,
                                     device,
                                     preferred_labels=[],
                                     tokens=[],
                                     tokenizer=None):
    scores = scores[:, :-1]
    num_bbox_reg_classes = boxes.shape[1] // 4
    # Convert to Boxes to use the `clip` function ...
    boxes = Boxes(boxes.reshape(-1, 4))
    boxes.clip(image_shape)
    boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)  # R x C x 4

    # Select max scores
    max_scores, max_classes = scores.max(1)  # R x C --> R

    num_objs = boxes.size(0)
    boxes = boxes.view(-1, 4)
    idxs = torch.arange(num_objs).cuda(
        device) * num_bbox_reg_classes + max_classes
    max_boxes = boxes[idxs]  # Select max boxes according to the max scores.

    # Apply NMS
    keep = nms(max_boxes, max_scores, nms_thresh)
    # calculate the closes tokens
    words = get_image_labels2(preferred_labels, max_classes[keep].tolist(),
                              keep.tolist())
    relevant = []
    others = []
    class_list = []

    for word, c, i in words:
        tok = tokenizer.vocab.get(word, tokenizer.vocab["[UNK]"])
        ## inserting the relevant first
        if tok in tokens:
            relevant.append(i)
        ## repeated predictions go last.
        elif c in class_list:
            class_list.append(c)
            others.append(i)
        ## Inserting varied predictions first
        else:
            class_list.append(c)
            others.insert(i, 0)

    keep = torch.tensor(relevant + others, device=device)

    #remove duplicate classes......

    if topk_per_image >= 0:
        keep = keep[:topk_per_image]
        keep = keep[torch.randperm(keep.size()[0])]
    boxes, scores = max_boxes[keep], max_scores[keep]

    result = Instances(image_shape)
    result.pred_boxes = Boxes(boxes)
    result.scores = scores
    result.pred_classes = max_classes[keep]

    return result, keep