Exemplo n.º 1
0
def _expand_bbox_targets(bbox_target_data):
    """Bounding-box regression targets are stored in a compact form in the
    roidb.

    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets). The loss weights
    are similarly expanded.

    Returns:
        bbox_target_data (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """
    num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES
    if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG:
        num_bbox_reg_classes = 2  # bg and fg

    clss = bbox_target_data[:, 0]
    bbox_targets = blob_utils.zeros((clss.size, 4 * num_bbox_reg_classes))
    bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)
    inds = np.where(clss > 0)[0]
    for ind in inds:
        cls = int(clss[ind])
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, start:end] = (1.0, 1.0, 1.0, 1.0)
    return bbox_targets, bbox_inside_weights
Exemplo n.º 2
0
def add_refine_keypoints_blobs_gaussian(blobs, roidb, fg_rois_per_image,
                                        fg_inds, im_scale, batch_idx, data):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    gt_keypoints = roidb['gt_keypoints']
    # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue
    # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big
    # issue for training.
    kp_fg_inds = blobs['keypoint_fg_inds']
    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        # Let's expand the rois
        up_scale = cfg.REFINENET.UP_SCALE
        inp_h, inp_w = data.shape[2], data.shape[3]
        pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

        pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale)
        pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h,
                                                    pad_img_w)

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype)
        for ii in range(len(pad_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_gaussian_heatmap_labels(
            sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE)

    else:  # If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob
        M = cfg.REFINENET.KRCNN.HEATMAP_SIZE
        heats = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, M, M))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1, cfg.KRCNN.NUM_KEYPOINTS, 1))

    pad_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1))
    pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois))

    blobs['refined_keypoint_rois'] = pad_fg_rois
    blobs['refined_keypoint_heatmaps'] = heats
    blobs['refined_keypoint_weights'] = weights
Exemplo n.º 3
0
def keypoints_to_heatmap_labels(keypoints, rois):
    """Generate location of heatmap
    Each roi and each keypoint -> xy location of keypoint
    For SoftmaxWithLoss across space
    rgirdhar: Don't modify for tubes, the modification was done in
    roi_data/keypoint_rcnn.py
    """
    # Maps keypoints from the half-open interval [x1, x2) on continuous image
    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image
    # coordinates. We use the continuous <-> discrete conversion from Heckbert
    # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
    # where d is a discrete coordinate and c is a continuous coordinate.
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros(shape)

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    # +1 added by rgirdhar, to avoid divides by 0
    scale_x = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 2] - rois[:, 0] + 1)
    scale_y = cfg.KRCNN.HEATMAP_SIZE / (rois[:, 3] - rois[:, 1] + 1)

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # Since we use floor below, if a keypoint is exactly on the roi's right
        # or bottom boundary, we shift it in by eps (conceptually) to keep it in
        # the ground truth heatmap.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = cfg.KRCNN.HEATMAP_SIZE - 1

        valid_loc = np.logical_and(
            np.logical_and(x >= 0, y >= 0),
            np.logical_and(x < cfg.KRCNN.HEATMAP_SIZE,
                           y < cfg.KRCNN.HEATMAP_SIZE))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)

        lin_ind = y * cfg.KRCNN.HEATMAP_SIZE + x
        heatmaps[:, kp] = lin_ind * valid
        weights[:, kp] = valid

    return heatmaps, weights
Exemplo n.º 4
0
def keypoints_to_gaussian_heatmap_labels(keypoints, rois, M=56):
    """Encode keypoint location in the target heatmap for use in
    MSELoss
    """
    # Maps keypoints from the half-open interval [x1, x2) on continuous image
    # coordinates to the closed interval [0, HEATMAP_SIZE - 1] on discrete image
    # coordinates. We use the continuous <-> discrete conversion from Heckbert
    # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
    # where d is a discrete coordinate and c is a continuous coordinate.
    assert keypoints.shape[2] == cfg.KRCNN.NUM_KEYPOINTS

    shape = (len(rois), cfg.KRCNN.NUM_KEYPOINTS, M, M)
    heatmaps = blob_utils.zeros(shape)
    weights = blob_utils.zeros((len(rois), cfg.KRCNN.NUM_KEYPOINTS))

    offset_x = rois[:, 0]
    offset_y = rois[:, 1]
    scale_x = M / (rois[:, 2] - rois[:, 0])
    scale_y = M / (rois[:, 3] - rois[:, 1])

    for kp in range(keypoints.shape[2]):
        vis = keypoints[:, 2, kp] > 0
        x = keypoints[:, 0, kp].astype(np.float32)
        y = keypoints[:, 1, kp].astype(np.float32)
        # Since we use floor below, if a keypoint is exactly on the roi's right
        # or bottom boundary, we shift it in by eps (conceptually) to keep it in
        # the ground truth heatmap.
        x_boundary_inds = np.where(x == rois[:, 2])[0]
        y_boundary_inds = np.where(y == rois[:, 3])[0]
        x = (x - offset_x) * scale_x
        x = np.floor(x)
        if len(x_boundary_inds) > 0:
            x[x_boundary_inds] = M - 1

        y = (y - offset_y) * scale_y
        y = np.floor(y)
        if len(y_boundary_inds) > 0:
            y[y_boundary_inds] = M - 1

        valid_loc = np.logical_and(
            np.logical_and(x >= 0, y >= 0),
            np.logical_and(
                x < M, y < M))

        valid = np.logical_and(valid_loc, vis)
        valid = valid.astype(np.int32)
        weights[:, kp] = valid

        for i in range(len(rois)):
            if valid[i] > 0:
                heatmaps[i, kp] = draw_gaussian_heatmap(
                    heatmaps[i, kp], (x[i], y[i]), sigma=1
                )


    return heatmaps, weights
Exemplo n.º 5
0
def _gen_blobs(entry, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    M = cfg.MRCNN.RESOLUTION

    selected_inds = np.where(entry['gt_classes'] > 0)[0]

    polys = [entry['segms'][i] for i in selected_inds]

    # Class labels and bounding boxes for the polys
    mask_class_labels = entry['gt_classes'][selected_inds]
    mask_rois = np.array(entry['boxes'][selected_inds], dtype='float32')

    # add mask polys
    masks = blob_utils.zeros((selected_inds.shape[0], M**2), int32=True)
    for i in range(len(polys)):
        # Rasterize the polygon mask to an M x M class labels image
        poly_gt = polys[i]
        mask_roi = mask_rois[i]
        mask_class_label = mask_class_labels[i]
        mask = segm_utils.polys_to_mask_wrt_box(poly_gt, mask_roi, M)
        mask = mask_class_label * np.array(mask > 0, dtype=np.int32)
        masks[i, :] = np.reshape(mask, M**2)

    blob_dict = {}
    blob_dict['masks_int32'] = masks

    return blob_dict
Exemplo n.º 6
0
def add_prn_blobs(blobs_out, blobs_in):
    """ Add PRN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    num_cls = cfg.MODEL.NUM_CLASSES
    iou_thres = cfg.PRN.IOU_THRESHOLD

    fg_inds = np.where(blobs_in['labels_int32'] > 0)[0]
    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        fg_labels = blobs_in['labels_int32'][fg_inds]
        # if below threshold, then set labels to 1, otherwise 0
        prn_labels = (blobs_in['mask_ious'] < iou_thres).astype(np.int32)
        # and set roi_needs_refine same as prn_labels
        roi_needs_refine = (blobs_in['mask_ious'] < iou_thres).astype(np.int32)
        # calculate refine ratio
        refine_ratio = np.sum(roi_needs_refine,
                              keepdims=True).astype(np.float32)
        refine_ratio /= fg_inds.shape[0]
        # sometimes the prn_labels might be all false, but we still need
        # a non-all-false roi_needs_refine. So set the first one as True
        if np.sum(roi_needs_refine) == 0:
            roi_needs_refine[0] = 1

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs_in['labels_int32'] == 0)[0]
        # We give it an -1's blob (ignore label)
        prn_labels = -blob_utils.ones((1, ), int32=True)
        # We label it with class = 0 (background)
        fg_labels = blob_utils.zeros((1, ))
        # and set roi_needs_refine to be 1
        roi_needs_refine = blob_utils.ones((1, ), int32=True)
        # set refine_ratio to be 0
        refine_ratio = blob_utils.zeros((1, ))

    if cfg.PRN.CLS_SPECIFIC_LABEL:
        prn_labels = _expand_to_class_specific_prn_targets(
            prn_labels, fg_labels)

    blobs_out['prn_labels_int32'] = prn_labels
    blobs_out['roi_needs_refine_int32'] = roi_needs_refine
    blobs_out['refine_ratio'] = refine_ratio
Exemplo n.º 7
0
def add_semantic_segms_blobs(blobs, roidb, im_scale, batch_idx, data):
    """ Add Semantic Segmentation Net specidfic blobs to the input blob
        dictionary. Draw all gt polygons to the label
    """
    num_cls = cfg.MODEL.NUM_CLASSES
    rescale_factor = cfg.SEMANTIC_NET.RESCALE_FACTOR
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    out_h, out_w = int(inp_h * rescale_factor), int(inp_w * rescale_factor)

    if polys_gt_inds.shape[0] > 0:
        # class label for the mask
        gt_class_labels = roidb['gt_classes'][polys_gt_inds]
        semantic_segms = blob_utils.zeros((num_cls, out_h, out_w), int32=True)
        # narrow scale and size
        scale = im_scale * rescale_factor
        im_h, im_w = roidb['height'], roidb['width']
        im_label_h, im_label_w = int(im_h * scale), int(im_w * scale)

        # add
        for i in range(polys_gt_inds.shape[0]):
            cls_label = gt_class_labels[i]
            poly_gt = polys_gt[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an im_label_h x im_label_w binary image
            mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            semantic_segms[cls_label, 0:im_label_h, 0:im_label_w] = np.maximum(
                semantic_segms[cls_label, 0:im_label_h, 0:im_label_w],
                mask,
                dtype=np.int32)

        semantic_segms = np.reshape(semantic_segms,
                                    (1, num_cls * out_h * out_w))

    else:
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).

        # We give it an -1's blob (ignore label)
        semantic_segms = -blob_utils.ones(
            (1, num_cls * out_h * out_w), int32=True)

    blobs['semantic_segms_int32'] = semantic_segms
    blobs['img_rois'] = np.array([batch_idx, 0, 0, inp_w - 1, inp_h - 1],
                                 dtype=np.float32)[np.newaxis, :]
    def forward(self, inputs, outputs):
        data = inputs[0].data
        keypoint_probs = inputs[1].data
        keypoint_rois = inputs[2].data

        # output indicator resolution
        M = self.resolution
        up_scale = self.up_scale
        num_rois = keypoint_rois.shape[0]
        num_keypoints = keypoint_probs.shape[1]

        # first expand the keypoint rois
        height, width = data.shape[2], data.shape[3]
        pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale)
        pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width)

        # get keypoint predictions and their probs
        # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob)
        pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois)
        
        # map keypoint position to the pad_rois
        # output shape is (#rois, #keypoints), locations flatter out
        locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels(
            pred_rois, pad_rois, M
        )
        locations_on_pad_rois = locations_on_pad_rois.astype(np.int32)

        # and now generate keypoint indicators
        keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2))
        for i in range(num_rois):
            locations = locations_on_pad_rois[i] # shape (#keypoints, )
            for k in range(num_keypoints):
                keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k]

        # and reshape to 4 dimension
        keypoint_indicators = keypoint_indicators.reshape(
            (num_rois, num_keypoints, M, M)
        )

        outputs[0].reshape(keypoint_indicators.shape)
        outputs[0].data[...] = keypoint_indicators
Exemplo n.º 9
0
def im_classify_bbox(model, im, box_proposals, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    timers['im_detect_bbox'].tic()
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    # do something to create the rois

    sampled_rois = box_proposals * inputs['im_info'][0, 2]
    repeated_batch_idx = blob_utils.zeros((sampled_rois.shape[0], 1))
    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))
    inputs['rois'] = sampled_rois
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois(inputs)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)

    workspace.RunNet(model.net.Proto().name)
    if cfg.MODEL.TYPE == 'region_classification':
        cls_prob = core.ScopedName('cls_prob')
    elif cfg.MODEL.TYPE == 'region_memory':
        cls_prob = core.ScopedName('final/cls_prob')
    else:
        raise NotImplementedError
    cls_scores = workspace.FetchBlob(cls_prob)

    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    timers['misc_bbox'].toc()

    return cls_scores
def dp_annot_process(ann, heatmap_size, crop_res, center, scale, IsFlipped):
    bb_xywh = np.array(ann['bbox'])

    bbox_gt = [bb_xywh[0], bb_xywh[1], bb_xywh[0] + bb_xywh[2], bb_xywh[1] + bb_xywh[3]]
    # Cropped Upper left point
    crop_ul = np.array(transform([1, 1], center, scale, [crop_res] * 2, invert=1)) - 1
    # Cropped Bottom right point
    crop_br = np.array(
        transform([crop_res + 1] * 2, center, scale, [crop_res] * 2, invert=1)) - 1
    bbox_crop = np.concatenate([crop_ul, crop_br])

    dp_dict = {}
    M = heatmap_size

    # Create blobs for densepose supervision.
    ################################################## The mask
    All_labels = blob_utils.zeros(M ** 2, int32=True)
    All_Weights = blob_utils.zeros(M ** 2, int32=True)
    ################################################# The points
    X_points = blob_utils.zeros(196, int32=False)
    Y_points = blob_utils.zeros(196, int32=False)
    Ind_points = blob_utils.zeros(196, int32=True)
    I_points = blob_utils.zeros(196, int32=True)
    U_points = blob_utils.zeros(196, int32=False)
    V_points = blob_utils.zeros(196, int32=False)
    Uv_point_weights = blob_utils.zeros(196, int32=False)
    #################################################

    Ilabel = segm_utils.GetDensePoseMask(ann['dp_masks'])
    #
    GT_I = np.array(ann['dp_I'])
    GT_U = np.array(ann['dp_U'])
    GT_V = np.array(ann['dp_V'])
    GT_x = np.array(ann['dp_x'])
    GT_y = np.array(ann['dp_y'])
    GT_weights = np.ones(GT_I.shape).astype(np.float32)
    #
    ## Do the flipping of the densepose annotation !
    if IsFlipped:
        GT_I, GT_U, GT_V, GT_x, GT_y, Ilabel = DP.get_symmetric_densepose(GT_I, GT_U, GT_V, GT_x, GT_y,
                                                                          Ilabel)
    #
    roi_fg = bbox_crop
    roi_gt = bbox_gt
    #
    x1 = roi_fg[0];
    x2 = roi_fg[2]
    y1 = roi_fg[1];
    y2 = roi_fg[3]
    #
    x1_source = roi_gt[0];
    x2_source = roi_gt[2]
    y1_source = roi_gt[1];
    y2_source = roi_gt[3]
    #
    x_targets = (np.arange(x1, x2, (x2 - x1) / float(M)) - x1_source) * (255. / (x2_source - x1_source))
    y_targets = (np.arange(y1, y2, (y2 - y1) / float(M)) - y1_source) * (255. / (y2_source - y1_source))
    #
    x_targets = x_targets[0:M]  ## Strangely sometimes it can be M+1, so make sure size is OK!
    y_targets = y_targets[0:M]
    #
    [X_targets, Y_targets] = np.meshgrid(x_targets, y_targets)
    New_Index = cv2.remap(Ilabel, X_targets.astype(np.float32), Y_targets.astype(np.float32),
                          interpolation=cv2.INTER_NEAREST, borderMode=cv2.BORDER_CONSTANT, borderValue=(0))
    # #
    All_L = np.zeros(New_Index.shape)
    All_W = np.ones(New_Index.shape)
    #
    All_L = New_Index
    #
    gt_length_x = x2_source - x1_source
    gt_length_y = y2_source - y1_source
    #
    GT_y = ((GT_y / 255. * gt_length_y) + y1_source - y1) * (float(M) / (y2 - y1))
    GT_x = ((GT_x / 255. * gt_length_x) + x1_source - x1) * (float(M) / (x2 - x1))
    #
    GT_I[GT_y < 0] = 0
    GT_I[GT_y > (M - 1)] = 0
    GT_I[GT_x < 0] = 0
    GT_I[GT_x > (M - 1)] = 0
    #
    points_inside = GT_I > 0
    GT_U = GT_U[points_inside]
    GT_V = GT_V[points_inside]
    GT_x = GT_x[points_inside]
    GT_y = GT_y[points_inside]
    GT_weights = GT_weights[points_inside]
    GT_I = GT_I[points_inside]
    #
    X_points[0:len(GT_x)] = GT_x
    Y_points[0:len(GT_y)] = GT_y
    # Ind_points[i, 0:len(GT_I)] = i
    I_points[0:len(GT_I)] = GT_I
    U_points[0:len(GT_U)] = GT_U
    V_points[0:len(GT_V)] = GT_V
    Uv_point_weights[0:len(GT_weights)] = GT_weights

    All_labels[:] = np.reshape(All_L.astype(np.int32), M ** 2)
    All_Weights[:] = np.reshape(All_W.astype(np.int32), M ** 2)

    # K = cfg.BODY_UV_RCNN.NUM_PATCHES
    K = 24
    # print(K)
    #
    U_points = np.tile(U_points, [K + 1])
    V_points = np.tile(V_points, [K + 1])
    Uv_Weight_Points = np.zeros(U_points.shape)
    #
    for jjj in range(1, K + 1):
        Uv_Weight_Points[jjj * I_points.shape[0]: (jjj + 1) * I_points.shape[0]] = (I_points == jjj).astype(
            np.float32)
        # Uv_Weight_Points[:, jjj * I_points.shape[1]: (jjj + 1) * I_points.shape[1]] = (I_points == jjj).astype(
        #     np.float32)

    ##
    dp_dict['body_uv_ann_labels'] = np.array(All_labels).astype(np.int32)
    dp_dict['body_uv_ann_weights'] = np.array(All_Weights).astype(np.float32)
    #
    ##########################
    dp_dict['body_uv_X_points'] = X_points.astype(np.float32)
    dp_dict['body_uv_Y_points'] = Y_points.astype(np.float32)
    dp_dict['body_uv_Ind_points'] = Ind_points.astype(np.float32)
    dp_dict['body_uv_I_points'] = I_points.astype(np.float32)
    dp_dict['body_uv_U_points'] = U_points.astype(
        np.float32)  #### VERY IMPORTANT :   These are switched here :
    dp_dict['body_uv_V_points'] = V_points.astype(np.float32)
    dp_dict['body_uv_point_weights'] = Uv_Weight_Points.astype(np.float32)

    return dp_dict
Exemplo n.º 11
0
def _sample_rois(roidb, im_scale, batch_idx):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    rois_per_image = int(cfg.TRAIN.BATCH_SIZE_PER_IM)
    fg_rois_per_image = int(np.round(cfg.TRAIN.FG_FRACTION * rois_per_image))
    max_overlaps = roidb['max_overlaps']

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size)
    # Sample foreground regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Label is the class each RoI has max overlap with
    sampled_labels = roidb['max_classes'][keep_inds]
    sampled_labels[fg_rois_per_this_image:] = 0  # Label bg RoIs with class 0
    sampled_boxes = roidb['boxes'][keep_inds]

    if 'bbox_targets' not in roidb:
        gt_inds = np.where(roidb['gt_classes'] > 0)[0]
        gt_boxes = roidb['boxes'][gt_inds, :]

        if not len(gt_boxes):
            num_bbox_reg_classes = cfg.MODEL.NUM_CLASSES
            clss = sampled_labels
            bbox_targets = blob_utils.zeros(
                (clss.size, 4 * num_bbox_reg_classes))
            bbox_inside_weights = blob_utils.zeros(bbox_targets.shape)
        else:
            gt_assignments = gt_inds[roidb['box_to_gt_ind_map'][keep_inds]]
            bbox_targets = _compute_targets(sampled_boxes,
                                            gt_boxes[gt_assignments, :],
                                            sampled_labels)
            bbox_targets, bbox_inside_weights = _expand_bbox_targets(
                bbox_targets)

    else:
        bbox_targets, bbox_inside_weights = _expand_bbox_targets(
            roidb['bbox_targets'][keep_inds, :])

    bbox_outside_weights = np.array(bbox_inside_weights > 0,
                                    dtype=bbox_inside_weights.dtype)

    # Scale rois and format as (batch_idx, x1, y1, x2, y2)
    sampled_rois = sampled_boxes * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_rois.shape[0], 1))
    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))

    # Base Fast R-CNN blobs
    blob_dict = dict(labels_int32=sampled_labels.astype(np.int32, copy=False),
                     rois=sampled_rois,
                     bbox_targets=bbox_targets,
                     bbox_inside_weights=bbox_inside_weights,
                     bbox_outside_weights=bbox_outside_weights)

    # Optionally add Mask R-CNN blobs
    if cfg.MODEL.MASK_ON:
        roi_data.mask_rcnn.add_mask_rcnn_blobs(blob_dict, sampled_boxes, roidb,
                                               im_scale, batch_idx)

    # Optionally add Keypoint R-CNN blobs
    if cfg.MODEL.KEYPOINTS_ON:
        roi_data.keypoint_rcnn.add_keypoint_rcnn_blobs(blob_dict, roidb,
                                                       fg_rois_per_image,
                                                       fg_inds, im_scale,
                                                       batch_idx)

    return blob_dict
Exemplo n.º 12
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    # Keep only a subset of classes (set A in the paper) for mask training
    if cfg.TRAIN.MRCNN_FILTER_LABELS:
        keep_label_set = set(cfg.TRAIN.MRCNN_LABELS_TO_KEEP)
        labels_int32 = blobs['labels_int32']
        labels_int32_keep = np.array(
            [(l if l in keep_label_set else 0) for l in labels_int32],
            dtype=labels_int32.dtype)
    else:
        labels_int32_keep = blobs['labels_int32']
    fg_inds = np.where(labels_int32_keep > 0)[0]
    roi_has_mask = labels_int32_keep.copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
Exemplo n.º 13
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE *
                            2.**(lvl - k_min), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(field_stride, anchor_sizes,
                                                  anchor_aspect_ratios)
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(cfg.RPN.STRIDE, cfg.RPN.SIZES,
                                              cfg.RPN.ASPECT_RATIOS)
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        # Added to ignore anchors that have overlap with crowd area
        ignore_inds = np.where(entry['is_crowd'][gt_inds] == 1)[0]
        if len(ignore_inds) == 0:
            ignore_inds = None

        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois, ignore_inds)
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs, vis_labels, vis_anchors = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois, ignore_inds)
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

        if cfg.TRAIN.VIS_ANCHOR:
            im = blobs['data'][0, :, :, :].squeeze() + np.array(
                cfg.PIXEL_MEANS).transpose((2, 0, 1))
            idx = np.where(vis_labels == 1)[0]
            anchor_bboxes = vis_anchors[idx, :]
            if not osp.exists(cfg.TRAIN.VIS_ANCHOR_DIR):
                os.makedirs(cfg.TRAIN.VIS_ANCHOR_DIR)
            save_path = osp.join(
                cfg.TRAIN.VIS_ANCHOR_DIR,
                osp.splitext(os.path.basename(entry['image']))[0])
            vis2d_utils.draw_pred_and_gt_tensor(im, gt_rois, save_path,
                                                anchor_bboxes)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    if cfg.LESION.USE_POSITION or cfg.LESION.POSITION_RCNN or cfg.LESION.SHALLOW_POSITION or cfg.LESION.MM_POS:
        valid_keys = [
            'has_visible_keypoints', 'boxes', 'segms', 'seg_areas',
            'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map',
            'gt_keypoints', 'z_position'
        ]
    else:
        valid_keys = [
            'has_visible_keypoints', 'boxes', 'segms', 'seg_areas',
            'gt_classes', 'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map',
            'gt_keypoints'
        ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    # blobs['roidb'] = blob_utils.serialize(minimal_roidb)
    blobs['roidb'] = minimal_roidb

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Exemplo n.º 14
0
def test_net(
    weights_file,
    dataset_name,
    proposal_file,
    output_dir,
    ind_range=None,
    gpu_id=0
):
    """Run inference on all images in a dataset or over an index range of images
    in a dataset using a single GPU.
    """
    roidb, dataset, start_ind, end_ind, total_num_images = get_roidb_and_dataset(
        dataset_name, proposal_file, ind_range
    )

    model = initialize_model_from_cfg(weights_file, gpu_id=gpu_id)
    num_images = len(roidb)
    num_classes = cfg.MODEL.NUM_CLASSES
    all_scores = empty_results(num_images)
    timers = defaultdict(Timer)
    for i, entry in enumerate(roidb):
        # just get the ground truth boxes
        box_proposals = entry['boxes'][entry['gt_classes'] > 0]
        if len(box_proposals) == 0:
            cls_scores_i = blob_utils.zeros((0, cfg.MODEL.NUM_CLASSES))
            extend_results(i, all_scores, cls_scores_i)
            continue

        im = cv2.imread(entry['image'])
        with c2_utils.NamedCudaScope(gpu_id):
            cls_scores_i = im_classify_bbox(
                model, im, box_proposals, timers
            )

        extend_results(i, all_scores, cls_scores_i)

        if i % 10 == 0:  # Reduce log file size
            ave_total_time = np.sum([t.average_time for t in timers.values()])
            eta_seconds = ave_total_time * (num_images - i - 1)
            eta = str(datetime.timedelta(seconds=int(eta_seconds)))
            det_time = (
                timers['im_classify_bbox'].average_time
            )
            misc_time = (
                timers['misc_bbox'].average_time
            )
            logger.info(
                (
                    'im_detect: range [{:d}, {:d}] of {:d}: '
                    '{:d}/{:d} {:.3f}s + {:.3f}s (eta: {})'
                ).format(
                    start_ind + 1, end_ind, total_num_images, start_ind + i + 1,
                    start_ind + num_images, det_time, misc_time, eta
                )
            )

        # if cfg.VIS:
        #     im_name = os.path.splitext(os.path.basename(entry['image']))[0]
        #     vis_utils.vis_one_image(
        #         im[:, :, ::-1],
        #         '{:d}_{:s}'.format(i, im_name),
        #         os.path.join(output_dir, 'vis'),
        #         cls_boxes_i,
        #         segms=cls_segms_i,
        #         keypoints=cls_keyps_i,
        #         thresh=cfg.VIS_TH,
        #         box_alpha=0.8,
        #         dataset=dataset,
        #         show_class=True
        #     )

    cfg_yaml = yaml.dump(cfg)
    if ind_range is not None:
        det_name = cfg.CFG_FILE + '_range_%s_%s.pkl' % tuple(ind_range)
    else:
        det_name = 'detections.pkl'
    det_file = os.path.join(output_dir, det_name)
    save_object(
        dict(
            all_scores=all_scores,
            cfg=cfg_yaml
        ), det_file
    )
    logger.info('Wrote detections to: {}'.format(os.path.abspath(det_file)))
    return all_scores
Exemplo n.º 15
0
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.REFINENET.RESOLUTION
    up_scale = cfg.REFINENET.UP_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    gt_classes = roidb['gt_classes'][polys_gt_inds]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # Expand the foreground rois by a factor of up_scale and
        # clip by the padded image boundary
        pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale)
        pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h,
                                                    pad_img_w)

        if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES:
            # Only use crowded samples to train the RefineNet
            THRES = cfg.REFINENET.OVERLAP_THRESHOLD
            for i in range(rois_fg.shape[0]):
                overlap = overlaps_bbfg_bbpolys[i]
                if np.sum(overlap > THRES) > 1:
                    # if has multiple instances overlapped, use it for training
                    fg_polys_ind = fg_polys_inds[i]
                    poly_gt = polys_gt[fg_polys_ind]
                    pad_roi_fg = pad_rois_fg[i]
                    # Rasterize the portion of the polygon mask within the given fg roi
                    # to an M x M binary image
                    mask = segm_utils.polys_to_mask_wrt_box(
                        poly_gt, pad_roi_fg, M)
                    mask = np.array(mask > 0,
                                    dtype=np.int32)  # Ensure it's binary
                    masks[i, :] = np.reshape(mask, M**2)

                else:  # Only one instance, then set label to be -1 (ignored)
                    masks[i, :] = -1
                    mask_class_labels[i] = 0
        elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
            loss_weights = blob_utils.ones((rois_fg.shape[0], ))
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                class_label = mask_class_labels[i]

                # Rasterize the portion of the polygon mask within the given
                # fg roi to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

                # And now determine the weight for each roi. If any instance
                # that is of the same class as the RoI, then we expect it to
                # be a hard sample and assigns a larger weight for this RoI
                for j in range(len(polys_gt)):
                    if j == fg_polys_ind:
                        continue
                    if gt_classes[
                            j] == class_label:  # only same class is valid
                        mask = segm_utils.polys_to_mask_wrt_box(
                            polys_gt[j], pad_roi_fg, M)
                        # and check if has anypart fall inside the bbox
                        is_inside_bbox = (np.sum(mask) > 0)
                        if is_inside_bbox:
                            loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED
                            break  # early stop

        else:
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # pad_rois_fg is actually one background roi, but that's ok because ...
        pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1))
    pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32)

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = pad_rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks

    if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
        blobs['loss_weights'] = loss_weights
Exemplo n.º 16
0
def add_charmask_rcnn_blobs(blobs, sampled_boxes, gt_boxes, gt_inds, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    is_e2e = cfg.MRCNN.IS_E2E
    M_HEIGHT = cfg.MRCNN.RESOLUTION_H
    M_WIDTH = cfg.MRCNN.RESOLUTION_W
    mask_rois_per_this_image = cfg.MRCNN.MASK_BATCH_SIZE_PER_IM
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    chars_gts = roidb['charboxes']
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    if DEBUG:
        img_path = roidb['image']
        img = Image.open(img_path)
        # img = blobs['data'][0]
        # img = img.transpose((1,2,0))
        # img  += cfg.PIXEL_MEANS
        # img = img.astype(np.int8)
        # img = Image.fromarray(img)

    if is_e2e:
        fg_inds = np.where(blobs['labels_int32'] > 0)[0]
        if fg_inds.size > mask_rois_per_this_image:
            fg_inds = npr.choice(
                fg_inds, size=mask_rois_per_this_image, replace=False
            )
        roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32)

        if fg_inds.shape[0] > 0:
            # Class labels for the foreground rois
            mask_class_labels = blobs['labels_int32'][fg_inds]
            masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH), dtype=np.float32)
            char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)

            # Find overlap between all foreground rois and the bounding boxes
            # enclosing each segmentation
            rois_fg = sampled_boxes[fg_inds]
            overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
                rois_fg.astype(np.float32, copy=False),
                boxes_from_polys.astype(np.float32, copy=False)
            )
            # Map from each fg rois to the index of the mask with highest overlap
            # (measured by bbox overlap)
            fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind)
                chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9]
                roi_fg = rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M_HEIGHT x M_WIDTH binary image
                mask, mask_weight, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt.copy(), poly_gt, roi_fg.copy(), M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH)
                if DEBUG:
                    draw = ImageDraw.Draw(img)
                    draw.rectangle([(roi_fg[0],roi_fg[1]), (roi_fg[2],roi_fg[3])])
                    img.save('./tests/image.jpg')
                    _visu_global_map(mask[0,:,:].copy(), './tests/proposals_visu_global.jpg')
                    _visu_char_map(mask[1,:,:].copy(), './tests/proposals_visu_char.jpg')
                    _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH)
                masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH)
                masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH)
                mask_weights[i, :] = np.reshape(mask_weight, M_HEIGHT*M_WIDTH)
                char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4))
                char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4))
        else:  # If there are no fg masks (it does happen)
            # The network cannot handle empty blobs, so we must provide a mask
            # We simply take the first bg roi, given it an all -1's mask (ignore
            # label), and label it with class zero (bg).
            bg_inds = np.where(blobs['labels_int32'] == 0)[0]
            # rois_fg is actually one background roi, but that's ok because ...
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
            # We give it an -1's blob (ignore label)
            masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes_inside_weight = np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32)
            # We label it with class = 0 (background)
            mask_class_labels = blob_utils.zeros((1, ))
            # Mark that the first roi has a mask
            roi_has_mask[0] = 1
    else:
        fg_inds = gt_inds
        roi_has_mask = np.ones((fg_inds.shape[0], ), dtype=np.int32)

        if fg_inds.shape[0] > 0:
            # Class labels for the foreground rois
            mask_class_labels = np.ones((fg_inds.shape[0], ), dtype=np.int32)
            masks = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_inside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            char_boxes_outside_weight = np.zeros((fg_inds.shape[0], M_HEIGHT*M_WIDTH, 4), dtype=np.float32)
            # mask_weights = blob_utils.zeros((fg_inds.shape[0], 2, M_HEIGHT*M_WIDTH), int32=True)

            rois_fg = gt_boxes
            # print(gt_boxes.shape[0])
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                indexes_rec_rois_gt_chars = np.where(chars_gts[:, 9] == fg_polys_ind)
                chars_gt = chars_gts[indexes_rec_rois_gt_chars, :9]
                roi_fg = rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M_HEIGHT x M_WIDTH binary image
                mask, char_box, char_box_inside_weight = segm_utils.polys_to_mask_wrt_box_rec(chars_gt, poly_gt, roi_fg, M_HEIGHT, M_WIDTH, weight_wh=cfg.MRCNN.WEIGHT_WH)
                if DEBUG:
                    _visu_char_box(char_box, char_box_inside_weight, './tests/char_box.jpg', M_HEIGHT, M_WIDTH)
                mask = np.array(mask, dtype=np.int32)  # Ensure it's binary
                # mask_weight = np.array(mask_weight, dtype=np.int32)  # Ensure it's binary
                masks[i, 0, :] = np.reshape(mask[0,:,:], M_HEIGHT*M_WIDTH)
                masks[i, 1, :] = np.reshape(mask[1,:,:], M_HEIGHT*M_WIDTH)
                char_boxes[i, :, :] = np.reshape(char_box, (M_HEIGHT*M_WIDTH, 4))
                char_boxes_inside_weight[i, :, :] = np.reshape(char_box_inside_weight, (M_HEIGHT*M_WIDTH, 4))
        else:  # If there are no fg masks (it does happen)
            # The network cannot handle empty blobs, so we must provide a mask
            # We simply take the first bg roi, given it an all -1's mask (ignore
            # label), and label it with class zero (bg).
            bg_inds = np.where(blobs['labels_int32'] == 0)[0]
            # rois_fg is actually one background roi, but that's ok because ...
            rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
            # We give it an -1's blob (ignore label)
            masks = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            mask_weights = -blob_utils.ones((1, 2, M_HEIGHT*M_WIDTH), int32=True)
            char_boxes = -np.ones(1, M_HEIGHT*M_WIDTH, 4, dtype=np.int32)
            char_boxes_inside_weight = -np.zeros(1, M_HEIGHT*M_WIDTH, 4, dtype=np.float32)
            # We label it with class = 0 (background)
            mask_class_labels = blob_utils.zeros((1, ))
            # Mark that the first roi has a mask
            roi_has_mask[0] = 1


    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    char_boxes_outside_weight = np.array(
        char_boxes_inside_weight > 0, dtype=char_boxes_inside_weight.dtype
    )

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_global_int32'] = masks[:, 0, :]
    blobs['masks_char_int32'] = masks[:, 1, :].reshape((-1, M_HEIGHT, M_WIDTH))
    blobs['masks_char_weight'] = mask_weights
    blobs['char_bbox_targets'] = char_boxes.reshape((-1,4))
    blobs['char_bbox_inside_weights'] = char_boxes_inside_weight.reshape((-1,4))
    blobs['char_bbox_outside_weights'] = char_boxes_outside_weight.reshape((-1,4))
Exemplo n.º 17
0
def add_boundary_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Boundary specific blobs to the input blob dictionary."""
    # Prepare the boundary targets by associating one gt boundary to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.BOUNDARY.RESOLUTION
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_boundary = blobs['labels_int32'].copy()
    roi_has_boundary[roi_has_boundary > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        boundary_class_labels = blobs['labels_int32'][fg_inds]
        boundarys = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the boundary with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon boundary within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            boundary = get_boundary(mask)
            boundarys[i, :] = np.reshape(boundary, M**2)
    else:  # If there are no fg boundarys (it does happen)
        # The network cannot handle empty blobs, so we must provide a boundary
        # We simply take the first bg roi, given it an all -1's boundary (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        boundarys = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        boundary_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a boundary
        roi_has_boundary[0] = 1

    if cfg.BOUNDARY.CLS_SPECIFIC_MASK:
        boundarys = _expand_to_class_specific_boundary_targets(
            boundarys, boundary_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['boundary_rois'] = rois_fg
    blobs['roi_has_boundary_int32'] = roi_has_boundary
    blobs['boundary_int32'] = boundarys
Exemplo n.º 18
0
def add_keypoint_rcnn_blobs_sigmoid(
    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']
    M = cfg.KRCNN.HEATMAP_SIZE

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)
    )[0]

    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
    if kp_fg_inds.size > kp_fg_rois_per_this_image:
        kp_fg_inds = np.random.choice(
            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False
        )

    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype
        )
        for ii in range(len(sampled_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_sigmoid_heatmap_labels(
            sampled_keypoints, sampled_fg_rois, M=cfg.KRCNN.HEATMAP_SIZE
        )

        shape = sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS
        heats = heats.reshape((shape, M**2))
        weights = weights.reshape((shape, 1))

    else:# If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        sampled_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob 
        heats = (-1) * blob_utils.ones((1 * cfg.KRCNN.NUM_KEYPOINTS, M**2))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1)
    )
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights

    # Since in this function we may random sample a subset of bbox as the roi, 
    # we need to make sure it's the same subset for the refined_keypoint_rois,
    # so we pass out the inds for the subset too. 
    blobs['keypoint_fg_inds'] = kp_fg_inds.astype(np.int32, copy=False)
Exemplo n.º 19
0
def add_rpn_blobs(blobs, im_scales, roidb):
    """Add blobs needed training RPN-only and end-to-end Faster R-CNN models."""
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        # RPN applied to many feature levels, as in the FPN paper
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        foas = []
        for lvl in range(k_min, k_max + 1):
            #field_stride = 2.**lvl
            anchor_sizes = (cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), )
	    field_stride = min(16., 2.**lvl)
            #anchor_sizes = (min(128., cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min)), )
            anchor_aspect_ratios = cfg.FPN.RPN_ASPECT_RATIOS
            foa = data_utils.get_field_of_anchors(
                field_stride, anchor_sizes, anchor_aspect_ratios
            )
            foas.append(foa)
        all_anchors = np.concatenate([f.field_of_anchors for f in foas])
    else:
        foa = data_utils.get_field_of_anchors(
            cfg.RPN.STRIDE, cfg.RPN.SIZES, cfg.RPN.ASPECT_RATIOS
        )
        all_anchors = foa.field_of_anchors

    for im_i, entry in enumerate(roidb):
        scale = im_scales[im_i]
        im_height = np.round(entry['height'] * scale)
        im_width = np.round(entry['width'] * scale)
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0)
        )[0]
        gt_rois = entry['boxes'][gt_inds, :] * scale
        # TODO(rbg): gt_boxes is poorly named;
        # should be something like 'gt_rois_info'
        gt_boxes = blob_utils.zeros((len(gt_inds), 6))
        gt_boxes[:, 0] = im_i  # batch inds
        gt_boxes[:, 1:5] = gt_rois
        gt_boxes[:, 5] = entry['gt_classes'][gt_inds]
        im_info = np.array([[im_height, im_width, scale]], dtype=np.float32)
        blobs['im_info'].append(im_info)

        # Add RPN targets
        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
            # RPN applied to many feature levels, as in the FPN paper
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, foas, all_anchors, gt_rois
            )
            for i, lvl in enumerate(range(k_min, k_max + 1)):
                for k, v in rpn_blobs[i].items():
                    blobs[k + '_fpn' + str(lvl)].append(v)
        else:
            # Classical RPN, applied to a single feature level
            rpn_blobs = _get_rpn_blobs(
                im_height, im_width, [foa], all_anchors, gt_rois
            )
            for k, v in rpn_blobs.items():
                blobs[k].append(v)

    for k, v in blobs.items():
        if isinstance(v, list) and len(v) > 0:
            blobs[k] = np.concatenate(v)

    valid_keys = [
        'has_visible_keypoints', 'boxes', 'segms', 'seg_areas', 'gt_classes',
        'gt_overlaps', 'is_crowd', 'box_to_gt_ind_map', 'gt_keypoints'
    ]
    minimal_roidb = [{} for _ in range(len(roidb))]
    for i, e in enumerate(roidb):
        for k in valid_keys:
            if k in e:
                minimal_roidb[i][k] = e[k]
    blobs['roidb'] = blob_utils.serialize(minimal_roidb)

    # Always return valid=True, since RPN minibatches are valid by design
    return True
Exemplo n.º 20
0
def add_refine_global_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                 batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    dst_scale = cfg.REFINENET.SPATIAL_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    out_h, out_w = int(inp_h * dst_scale), int(inp_w * dst_scale)

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], out_h, out_w), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # narrow scale and size
        scale = im_scale * dst_scale
        im_h, im_w = roidb['height'], roidb['width']
        im_label_h, im_label_w = int(im_h * scale), int(im_w * scale)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an im_label_h x im_label_w binary image
            mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, 0:im_label_h, 0:im_label_w] = mask

        masks = np.reshape(masks, (-1, out_h * out_w))

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, out_h * out_w), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks