Пример #1
0
def predict_bounding_boxes(net,
                           image,
                           min_c,
                           overlap_thres,
                           topk,
                           ctx=mx.gpu()):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
        
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    '''
    image = mx.nd.array(image).expand_dims(axis=2)
    image = mx.image.resize_short(image, 350)
    image = image.transpose([2, 0, 1]) / 255.

    image = image.as_in_context(ctx)
    image = image.expand_dims(0)

    bb = np.zeros(shape=(13, 5))
    bb = mx.nd.array(bb)
    bb = bb.as_in_context(ctx)
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)

    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)

    cls_probs = mx.nd.SoftmaxActivation(mx.nd.transpose(
        class_predictions, (0, 2, 1)),
                                        mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    return predicted_bb
Пример #2
0
def predict_bounding_boxes(net, image, bb):
    '''
    Given the outputs of the dataset (image and bounding box) and the network, 
    the predicted bounding boxes are provided.
    
    Parameters
    ----------
    net: SSD
    The trained SSD network.
    
    image: np.array
    A grayscale image of the handwriting passages.
    
    bb: [(x1, y1, x2, y2)]
    A tuple that contains the bounding box.
    
    Returns
    -------
    predicted_bb: [(x, y, w, h)]
    The predicted bounding boxes.
    
    actual_bb: [(x, y, w, h)]
    The actual bounding bounding boxes.
    '''
    image, bb = transform(image, bb)

    image = image.as_in_context(ctx[0])
    image = image.expand_dims(axis=0)

    bb = bb.as_in_context(ctx[0])
    bb = bb.expand_dims(axis=0)

    default_anchors, class_predictions, box_predictions = net(image)
    box_target, box_mask, cls_target = net.training_targets(
        default_anchors, class_predictions, bb)
    cls_probs = nd.SoftmaxActivation(nd.transpose(class_predictions,
                                                  (0, 2, 1)),
                                     mode='channel')

    predicted_bb = MultiBoxDetection(
        *[cls_probs, box_predictions, default_anchors],
        force_suppress=True,
        clip=False)
    predicted_bb = box_nms(predicted_bb,
                           overlap_thresh=overlap_thres,
                           valid_thresh=min_c,
                           topk=topk)
    predicted_bb = predicted_bb.asnumpy()
    predicted_bb = predicted_bb[0, predicted_bb[0, :, 0] != -1]
    predicted_bb = predicted_bb[:, 2:]
    predicted_bb[:, 2] = predicted_bb[:, 2] - predicted_bb[:, 0]
    predicted_bb[:, 3] = predicted_bb[:, 3] - predicted_bb[:, 1]

    labeled_bb = bb[:, :, 1:].asnumpy()
    labeled_bb[:, :, 2] = labeled_bb[:, :, 2] - labeled_bb[:, :, 0]
    labeled_bb[:, :, 3] = labeled_bb[:, :, 3] - labeled_bb[:, :, 1]
    labeled_bb = labeled_bb[0]
    return predicted_bb, labeled_bb
Пример #3
0
def generate_output_image(box_predictions, default_anchors, cls_probs,
                          box_target, box_mask, cls_target, x, y):
    '''
    Generate the image with the predicted and actual bounding boxes.
    Parameters
    ----------
    box_predictions: nd.array
        Bounding box predictions relative to the anchor boxes, output of the network

    default_anchors: nd.array
        Anchors used, output of the network
    
    cls_probs: nd.array
        Output of nd.SoftmaxActivation(nd.transpose(class_predictions, (0, 2, 1)), mode='channel')
        where class_predictions is the output of the network.

    box_target: nd.array
        Output classification probabilities from network.training_targets(default_anchors, class_predictions, y)

    box_mask: nd.array
        Output bounding box predictions from network.training_targets(default_anchors, class_predictions, y) 

    cls_target: nd.array
        Output targets from network.training_targets(default_anchors, class_predictions, y)
    
    x: nd.array
       The input images

    y: nd.array
        The actual labels

    Returns
    -------
    output_image: np.array
        The images with the predicted and actual bounding boxes drawn on

    number_of_bbs: int
        The number of predicting bounding boxes
    '''
    output = MultiBoxDetection(*[cls_probs, box_predictions, default_anchors],
                               force_suppress=True,
                               clip=False)
    output = box_nms(output,
                     overlap_thresh=overlap_thres,
                     valid_thresh=min_c,
                     topk=topk)
    output = output.asnumpy()

    number_of_bbs = 0
    predicted_bb = []
    for b in range(output.shape[0]):
        predicted_bb_ = output[b, output[b, :, 0] != -1]
        predicted_bb_ = predicted_bb_[:, 2:]
        number_of_bbs += predicted_bb_.shape[0]
        predicted_bb_[:, 2] = predicted_bb_[:, 2] - predicted_bb_[:, 0]
        predicted_bb_[:, 3] = predicted_bb_[:, 3] - predicted_bb_[:, 1]
        predicted_bb.append(predicted_bb_)

    labels = y[:, :, 1:].asnumpy()
    labels[:, :, 2] = labels[:, :, 2] - labels[:, :, 0]
    labels[:, :, 3] = labels[:, :, 3] - labels[:, :, 1]

    output_image = draw_boxes_on_image(predicted_bb, labels, x.asnumpy())
    output_image[output_image < 0] = 0
    output_image[output_image > 1] = 1

    return output_image, number_of_bbs