示例#1
0
def compute_predicted_bboxes(rois, pred_cls, pred_loc, image_info, cfg):
    '''
    :param cfg: config
    :param rois: [N, k] k>=5, batch_ix, x1, y1, x2, y2
    :param pred_cls:[N, num_classes, 1, 1]
    :param pred_loc:[N, num_classes * 4, 1, 1]
    :param image_info:[N, 3]
    :return: bboxes: [M, 7], batch_ix, x1, y1, x2, y2, score, cls
    '''
    # logger = logging.getLogger('global')
    rois, pred_cls, pred_loc = map(to_np_array, [rois, pred_cls, pred_loc])
    N, num_classes = pred_cls.shape[0:2]
    B = max(rois[:, 0].astype(np.int32)) + 1
    assert (N == rois.shape[0])
    nmsed_bboxes = []
    for cls in range(1, num_classes):
        scores = pred_cls[:, cls].squeeze()
        deltas = pred_loc[:, cls * 4:cls * 4 + 4].squeeze()
        if cfg['bbox_normalize_stats_precomputed']:
            deltas = deltas * np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]\
                     + np.array(cfg['bbox_normalize_means'])[np.newaxis, :]
        bboxes = bbox_helper.compute_loc_bboxes(rois[:, 1:1 + 4], deltas)
        bboxes = np.hstack([bboxes, scores[:, np.newaxis]])
        # for each image, do nms
        for b_ix in range(B):
            rois_ix = np.where(rois[:, 0] == b_ix)[0]
            pre_scores = scores[rois_ix]
            pre_bboxes = bboxes[rois_ix]
            pre_bboxes[:, :4] = bbox_helper.clip_bbox(pre_bboxes[:, :4],
                                                      image_info[b_ix])
            if cfg['score_thresh'] > 0:
                keep_ix = np.where(pre_scores > cfg['score_thresh'])[0]
                pre_scores = pre_scores[keep_ix]
                pre_bboxes = pre_bboxes[keep_ix]
            if pre_scores.size == 0: continue
            order = pre_scores.argsort()[::-1]
            pre_bboxes = pre_bboxes[order, :]
            keep_index = nms(
                torch.from_numpy(pre_bboxes).float().cuda(),
                cfg['nms_iou_thresh']).numpy()
            post_bboxes = pre_bboxes[keep_index]
            batch_ix = np.full(post_bboxes.shape[0], b_ix)
            batch_cls = np.full(post_bboxes.shape[0], cls)
            post_bboxes = np.hstack([
                batch_ix[:, np.newaxis], post_bboxes, batch_cls[:, np.newaxis]
            ])
            nmsed_bboxes.append(post_bboxes)
    nmsed_bboxes = np.vstack(nmsed_bboxes)
    if cfg['top_n'] > 0:
        top_n_bboxes = []
        for b_ix in range(B):
            bboxes = nmsed_bboxes[nmsed_bboxes[:, 0] == b_ix]
            scores = bboxes[:, -2]
            order = scores.argsort()[::-1][:cfg['top_n']]
            bboxes = bboxes[order]
            top_n_bboxes.append(bboxes)
        nmsed_bboxes = np.vstack(top_n_bboxes)
    nmsed_bboxes = (torch.from_numpy(nmsed_bboxes)).float().cuda()
    return nmsed_bboxes
示例#2
0
def compute_rpn_proposals(conv_cls, conv_loc, cfg, image_info):
    '''
    :argument
        cfg: configs
        conv_cls: FloatTensor, [batch, num_anchors * x, h, w], conv output of classification
        conv_loc: FloatTensor, [batch, num_anchors * 4, h, w], conv output of localization
        image_info: FloatTensor, [batch, 3], image size
    :returns
        proposals: Variable, [N, 5], 2-dim: batch_ix, x1, y1, x2, y2
    '''

    batch_size, num_anchors_4, featmap_h, featmap_w = conv_loc.shape
    # [K*A, 4]
    anchors_overplane = anchor_helper.get_anchors_over_plane(featmap_h, featmap_w,
                                                             cfg['anchor_ratios'], cfg['anchor_scales'], cfg['anchor_stride'])
    B = batch_size
    A = num_anchors = num_anchors_4 // 4
    assert(A * 4 == num_anchors_4)
    K = featmap_h * featmap_w

    cls_view = conv_cls.permute(0, 2, 3, 1).contiguous().view(B, K*A, -1).cpu().numpy()
    loc_view = conv_loc.permute(0, 2, 3, 1).contiguous().view(B, K*A, 4).cpu().numpy()
    if torch.is_tensor(image_info):
        image_info = image_info.cpu().numpy()

    #all_proposals = [bbox_helper.compute_loc_bboxes(anchors_overplane, loc_view[ix]) for ix in range(B)]
    # [B, K*A, 4]
    #pred_loc = np.stack(all_proposals, axis = 0)
    #pred_cls = cls_view
    batch_proposals = []
    pre_nms_top_n = cfg['pre_nms_top_n']
    for b_ix in range(B):
        scores = cls_view[b_ix, :, -1] # to compatible with sigmoid
        if pre_nms_top_n <= 0 or pre_nms_top_n > scores.shape[0]:
            order = scores.argsort()[::-1]
        else:
            inds = np.argpartition(-scores, pre_nms_top_n)[:pre_nms_top_n]
            order = np.argsort(-scores[inds])
            order = inds[order]
        loc_delta = loc_view[b_ix, order, :]
        loc_anchors = anchors_overplane[order, :]
        scores = scores[order]
        boxes = bbox_helper.compute_loc_bboxes(loc_anchors, loc_delta)
        boxes = bbox_helper.clip_bbox(boxes, image_info[b_ix])
        proposals = np.hstack([boxes, scores[:, np.newaxis]])
        proposals = proposals[(proposals[:, 2] - proposals[:, 0] + 1 >= cfg['roi_min_size'])
                            & (proposals[:, 3] - proposals[:, 1] + 1 >= cfg['roi_min_size'])]
        keep_index = nms(torch.from_numpy(proposals).float().cuda(), cfg['nms_iou_thresh']).numpy()
        if cfg['post_nms_top_n'] > 0:
            keep_index = keep_index[:cfg['post_nms_top_n']]
        proposals = proposals[keep_index]
        batch_ix = np.full(keep_index.shape, b_ix)
        proposals = np.hstack([batch_ix[:, np.newaxis], proposals])
        batch_proposals.append(proposals)
    batch_proposals = (torch.from_numpy(np.vstack(batch_proposals))).float()
    if batch_proposals.dim() < 2:
        batch_proposals.unsqueeze(dim=0)
    return batch_proposals
示例#3
0
def compute_mask_targets(proposals,
                         cfg,
                         ground_truth_bboxes,
                         ground_truth_masks,
                         image_info,
                         ignore_regions=None):
    '''
    Args:
        proposals:[N, k], k>=5(b_ix, x1,y1,x2,y2, ...)
        ground_truth_bboxes: [batch_size, max_gts, k], k>=5(x1,y1,x2,y2,label)
        ground_truth_masks: [batch_size, max_gts, image_h, image_w]
        image_info: [batch_size, 3], (resized_image_h, resized_image_w, resize_scale)
    Return:
        batch_rois: [R, 5] (b_ix, x1,y1,x2,y2)
        batch_kpt_labels: [R, num_classes, label_h, label_w]
    '''
    proposals_device = proposals.device
    proposals = to_np_array(proposals)
    ground_truth_bboxes = to_np_array(ground_truth_bboxes)
    ground_truth_masks = to_np_array(ground_truth_masks)
    image_info = to_np_array(image_info)
    ignore_regions = to_np_array(ignore_regions)

    B = ground_truth_bboxes.shape[0]
    batch_rois = []
    batch_mask_labels = []

    for b_ix in range(B):
        rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4]
        gts = ground_truth_bboxes[b_ix]
        masks = ground_truth_masks[b_ix]
        # kick out padded gts
        keep_ix = np.where(gts[:, 2] > gts[:, 1] + 1)[0]
        if keep_ix.size == 0: continue
        gts = gts[keep_ix]
        masks = masks[keep_ix]
        if cfg['append_gts']:
            rois = np.vstack([rois, gts[:, :4]])
        rois = bbox_helper.clip_bbox(rois.astype(np.int32),
                                     image_info[b_ix].astype(np.int32))
        R = rois.shape[0]
        G = gts.shape[0]
        if R == 0 or G == 0: continue
        # [R, G]
        overlaps = bbox_helper.bbox_iou_overlaps(rois, gts)
        # [R]
        # (i): a roi that has an IoU higher than than positive_iou_thresh is postive
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0]
        pos_g_ix = argmax_overlaps[pos_r_ix]

        # sampling
        num_positives = pos_r_ix.shape[0]
        if num_positives == 0: continue
        if cfg['batch_size_per_image'] > 0 and num_positives > cfg[
                'batch_size_per_image']:
            keep_ix = np.random.choice(num_positives,
                                       size=cfg['batch_size_per_image'],
                                       replace=False)
            pos_r_ix = pos_r_ix[keep_ix]
            pos_g_ix = pos_g_ix[keep_ix]

        # gather positive bboxes and related masks
        pos_rois = rois[pos_r_ix]
        pos_target_classes = gts[pos_g_ix][:, 4].astype(np.int64)
        pos_target_masks = masks[pos_g_ix]
        N = pos_rois.shape[0]
        pos_mask_labels = generate_mask_labels(pos_rois, pos_target_masks,
                                               cfg['label_h'], cfg['label_w'])

        mask_labels = -np.ones(
            (N, cfg['num_classes'], cfg['label_h'], cfg['label_w']))
        mask_labels[range(N), pos_target_classes, ...] = pos_mask_labels

        batch_idx = np.full((N, 1), b_ix)
        pos_rois = np.hstack(
            [batch_idx, pos_rois, pos_target_classes[:, np.newaxis]])

        batch_rois.append(pos_rois)
        batch_mask_labels.append(mask_labels)
    if len(batch_rois) == 0:
        # if there's no positive rois, pad zeros
        n = 1
        batch_rois = np.zeros((n, 5), dtype=np.float32)
        batch_mask_labels = -np.ones(
            (n, cfg['num_classes'], cfg['label_h'], cfg['label_w']),
            dtype=np.float32)
    else:
        batch_rois = np.vstack(batch_rois)
        batch_mask_labels = np.vstack(batch_mask_labels)

    # debug
    #import os
    #import torch.distributed as dist
    #vis_mask = 'vis_mask'
    #if not os.path.exists(vis_mask):
    #    os.makedirs(vis_mask)
    #for i, roi in enumerate(batch_rois):
    #    b_ix, x1, y1, x2, y2, cls = map(int, roi[:6])
    #    roi_w = x2 - x1
    #    roi_h = y2 - y1
    #    img = debugger.get_image(b_ix).copy()
    #    filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0]
    #    mask = batch_mask_labels[i, cls]
    #    mask = cv2.resize(mask, (roi_w, roi_h)) * 100
    #    img[y1:y2, x1:x2, ...] += mask[..., np.newaxis]
    #    vis_helper.draw_bbox(img, roi[1:1+4])
    #    cv2.imwrite('vis_mask/{0}_{1}.jpg'.format(filename, i), img)
    cuda_device = proposals_device
    f = lambda x: (torch.from_numpy(x)).to(cuda_device)
    batch_rois = f(batch_rois).float()
    batch_mask_labels = f(batch_mask_labels).float()
    return batch_rois, batch_mask_labels
示例#4
0
def compute_proposal_targets(proposals,
                             cfg,
                             ground_truth_bboxes,
                             image_info,
                             ignore_regions=None,
                             use_ohem=False):
    '''
    :argument
        proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2
        ground_truth_bboxes: [batch, max_num_gts, k], k>=5, x1,y1,x2,y2,label
    returns:
        rois: [N, 5]:
        cls_targets: [N, num_classes]
        loc_targets, loc_weights: [N, num_classes * 4]
    '''
    proposals, ground_truth_bboxes, image_info, ignore_regions = \
        map(to_np_array, [proposals, ground_truth_bboxes, image_info, ignore_regions])
    B = ground_truth_bboxes.shape[0]
    logger.debug('proposals.shape:{}'.format(proposals.shape))
    logger.debug('ground_truth_bboxes.shape:{}'.format(
        ground_truth_bboxes.shape))
    batch_rois = []
    batch_labels = []
    batch_loc_targets = []
    batch_loc_weights = []
    for b_ix in range(B):
        rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4]
        gts = ground_truth_bboxes[b_ix]
        # kick out padded empty ground truth bboxes
        #gts = gts[gts[:, 2] > gts[:, 0] + 1]
        gts = gts[(gts[:, 2] > gts[:, 0] + 1) & (gts[:, 3] > gts[:, 1] + 1)]
        if cfg['append_gts']:
            rois = np.vstack([rois, gts[:, :4]])
        rois = bbox_helper.clip_bbox(rois, image_info[b_ix])
        R = rois.shape[0]
        G = gts.shape[0]
        if R == 0 or G == 0: continue
        #[R, G]
        overlaps = bbox_helper.bbox_iou_overlaps(rois, gts)

        # (i) the anchor with the highest Intersection-over-Union (IoU)
        # overlap with a ground-truth box is positive
        # [G]
        #gt_max_overlaps = overlaps.max(axis=0)
        #gt_max_overlaps[gt_max_overlaps < 0.1] = -1
        #gt_pos_r_ix, gt_pos_g_ix = np.where(overlaps == gt_max_overlaps[np.newaxis, :])

        # (ii) an anchor that has an IoU overlap higher than positive_iou_thresh
        # with any ground-truth box is positive
        # [R]
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0]
        pos_g_ix = argmax_overlaps[pos_r_ix]

        # merge pos_r_ix & gt_pos_b_ix
        #pos_r_ix = np.concatenate([pos_r_ix, gt_pos_r_ix])
        #pos_g_ix = np.concatenate([pos_g_ix, gt_pos_g_ix])
        # remove duplicate positives
        pos_r_ix, return_index = np.unique(pos_r_ix, return_index=True)
        pos_g_ix = pos_g_ix[return_index]

        # (iii) We assign a negative label to a non-positive anchor if its IoU ratio
        # is between [negative_iou_thresh_lo, negative_iou_thresh_low] for all ground-truth boxes
        neg_r_ix = np.where((max_overlaps < cfg['negative_iou_thresh_hi'])
                            &
                            (max_overlaps >= cfg['negative_iou_thresh_lo']))[0]

        # remove negatives which located in ignore regions
        if ignore_regions is not None:
            cur_ignore = ignore_regions[b_ix]
            # remove padded ignore regions
            cur_ignore = cur_ignore[cur_ignore[:, 2] - cur_ignore[:, 0] > 1]
            if cur_ignore.shape[0] > 0:
                iof_overlaps = bbox_helper.bbox_iof_overlaps(rois, cur_ignore)
                max_iof_overlaps = iof_overlaps.max(axis=1)  # [B, K*A]
                ignore_rois_ix = np.where(
                    max_iof_overlaps > cfg['ignore_iou_thresh'])[0]
                neg_r_ix = np.array(list(set(neg_r_ix) - set(ignore_rois_ix)))

        # remove positives(rule (i)) from negatives
        neg_r_ix = np.array(list(set(neg_r_ix) - set(pos_r_ix)))

        #sampling
        num_positives = len(pos_r_ix)

        batch_size_per_image = cfg['batch_size']

        # keep all pos and negs if use OHEM
        if not use_ohem:
            num_pos_sampling = int(cfg['positive_percent'] *
                                   batch_size_per_image)
            if num_pos_sampling < num_positives:
                keep_ix = np.random.choice(num_positives,
                                           size=num_pos_sampling,
                                           replace=False)
                pos_r_ix = pos_r_ix[keep_ix]
                pos_g_ix = pos_g_ix[keep_ix]
                num_positives = num_pos_sampling

            num_negatives = len(neg_r_ix)
            num_neg_sampling = batch_size_per_image - num_positives
            if num_neg_sampling < num_negatives:
                keep_ix = np.random.choice(num_negatives,
                                           size=num_neg_sampling,
                                           replace=False)
                neg_r_ix = neg_r_ix[keep_ix]
                num_negatives = num_neg_sampling
            #else:
            #    keep_ix = np.random.choice(num_negatives, size = num_neg_sampling, replace = True)
            #    neg_r_ix = neg_r_ix[keep_ix]
            #    num_negatives = num_neg_sampling

        # convert neg_r_ix, pos_r_ix and pos_g_ix from np.array to list in case of *_ix == np.array([])
        # which can't index np.array
        pos_r_ix = list(pos_r_ix)
        pos_g_ix = list(pos_g_ix)
        neg_r_ix = list(neg_r_ix)
        # gather positives, matched gts, and negatives
        pos_rois = rois[pos_r_ix]
        pos_target_gts = gts[pos_g_ix]
        neg_rois = rois[neg_r_ix]
        rois_sampling = np.vstack([pos_rois, neg_rois])
        num_pos, num_neg = pos_rois.shape[0], neg_rois.shape[0]
        num_sampling = num_pos + num_neg

        # generate targets
        pos_labels = pos_target_gts[:, 4].astype(np.int32)
        neg_labels = np.zeros(num_neg)
        labels = np.concatenate([pos_labels, neg_labels]).astype(np.int32)

        loc_targets = np.zeros([num_sampling, cfg['num_classes'], 4])
        loc_weights = np.zeros([num_sampling, cfg['num_classes'], 4])
        pos_loc_targets = bbox_helper.compute_loc_targets(
            pos_rois, pos_target_gts)
        if cfg['bbox_normalize_stats_precomputed']:
            pos_loc_targets = (pos_loc_targets - np.array(cfg['bbox_normalize_means'])[np.newaxis, :]) \
                              / np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]
        loc_targets[range(num_pos), pos_labels, :] = pos_loc_targets
        loc_weights[range(num_pos), pos_labels, :] = 1
        loc_targets = loc_targets.reshape([num_sampling, -1])
        loc_weights = loc_weights.reshape([num_sampling, -1])

        batch_ix = np.full(rois_sampling.shape[0], b_ix)
        rois_sampling = np.hstack([batch_ix[:, np.newaxis], rois_sampling])

        if rois_sampling.shape[0] < batch_size_per_image:
            rep_num = batch_size_per_image - rois_sampling.shape[0]
            rep_index = np.random.choice(rois_sampling.shape[0],
                                         size=rep_num,
                                         replace=True)
            rois_sampling = np.vstack(
                [rois_sampling, rois_sampling[rep_index]])
            labels = np.concatenate([labels, labels[rep_index]])
            loc_targets = np.vstack([loc_targets, loc_targets[rep_index]])
            loc_weights = np.vstack([loc_weights, loc_weights[rep_index]])

        batch_rois.append(rois_sampling)
        batch_labels.append(labels)
        batch_loc_targets.append(loc_targets)
        batch_loc_weights.append(loc_weights)

    pos_num = np.where(np.concatenate(batch_labels) > 0)[0].shape[0]
    neg_num = np.concatenate(batch_labels).shape[0] - pos_num
    history[0] += pos_num
    history[1] += neg_num
    history_pos, history_neg = history
    pos_percent = history_pos / (history_neg + history_pos)
    neg_percent = history_neg / (history_neg + history_pos)
    logger.debug(
        'proposal_target(pos/neg): %d=%d+%d, history ratio:%.5f/%.5f' %
        (pos_num + neg_num, pos_num, neg_num, pos_percent, neg_percent))

    batch_rois = (torch.from_numpy(
        np.vstack(batch_rois))).float().cuda().contiguous()
    batch_labels = (torch.from_numpy(
        np.concatenate(batch_labels))).long().cuda().contiguous()
    batch_loc_targets = (torch.from_numpy(
        np.vstack(batch_loc_targets))).float().cuda().contiguous()
    batch_loc_weights = (torch.from_numpy(
        np.vstack(batch_loc_weights))).float().cuda().contiguous()

    return batch_rois, batch_labels, batch_loc_targets, batch_loc_weights
示例#5
0
def validate_single(val_loader, model, cfg):
    global best_map
    logger = logging.getLogger('global')

    rank, world_size = 0, 1

    # switch to evaluate mode
    model.eval()
    total_rc = 0
    total_gt = 0

    logger.info('start validate')
    if not os.path.exists(args.results_dir):
        try:
            os.makedirs(args.results_dir)
        except Exception as e:
            print(e)
    # remove the original results file
    # if rank == 0:
    for f in os.listdir(args.results_dir):
        if 'results.txt.rank' in f and int(f.split('k')[-1]) >= world_size:
            logger.info("remove %s" % f)
            os.remove(os.path.join(args.results_dir, f))

    fout = open(os.path.join(args.results_dir, 'results.txt.rank%d' % rank),
                'w')

    for iter, input in enumerate(val_loader):
        img = (input[0]).cuda()
        img_info = input[1]
        gt_boxes = input[2]
        filenames = input[-1]
        x = {
            'cfg': cfg,
            'image': img,
            'image_info': img_info,
            'ground_truth_bboxes': gt_boxes,
            'ignore_regions': None
        }
        batch_size = img.shape[0]
        t1 = time.time()
        t0 = time.time()
        outputs = model(x)['predict']
        t2 = time.time()

        proposals = outputs[0].data.cpu().numpy()
        bboxes = outputs[1].data.cpu().numpy()
        if torch.is_tensor(gt_boxes):
            gt_boxes = gt_boxes.cpu().numpy()
        for b_ix in range(batch_size):
            img_id = filenames[b_ix].rsplit('/', 1)[-1].rsplit('.', 1)[0]
            img_resize_scale = img_info[b_ix, -1]
            if args.dataset == 'coco':
                img_resize_scale = img_info[b_ix, 2]
            rois_per_image = proposals[proposals[:, 0] == b_ix]
            dts_per_image = bboxes[bboxes[:, 0] == b_ix]
            gts_per_image = gt_boxes[b_ix]
            # rpn recall
            num_rc, num_gt = bbox_helper.compute_recall(
                rois_per_image[:, 1:1 + 4], gts_per_image)
            total_gt += num_gt
            total_rc += num_rc
            order = dts_per_image[:, -2].argsort()[::-1][:100]
            dts_per_image = dts_per_image[order]

            # faster-rcnn eval
            for cls in range(1, cfg['shared']['num_classes']):
                dts_per_cls = dts_per_image[dts_per_image[:, -1] == cls]
                gts_per_cls = gts_per_image[gts_per_image[:, -1] == cls]
                dts_per_cls = dts_per_cls[:, 1:-1]
                # dts_per_cls = dts_per_cls[dts_per_cls[:, -1] > 0.05]
                gts_per_cls = gts_per_cls[:, :-1]
                dts_per_cls = bbox_helper.clip_bbox(dts_per_cls,
                                                    img_info[b_ix, :2])
                if len(dts_per_cls) > 0:
                    dts_per_cls[:, :4] = dts_per_cls[:, :4] / img_resize_scale
                if len(gts_per_cls) > 0:
                    gts_per_cls[:, :4] = gts_per_cls[:, :4] / img_resize_scale
                for bx in dts_per_cls:
                    if args.dataset == 'coco':
                        fout.write('val2017/{0}.jpg {1} {2}\n'.format(
                            img_id, ' '.join(map(str, bx)), cls))
                    else:
                        fout.write('{0} {1} {2}\n'.format(
                            img_id, ' '.join(map(str, bx)), cls))
                fout.flush()
        logger.info('Test: [%d/%d] Time: %.3f %d/%d' %
                    (iter, len(val_loader), t2 - t0, total_rc, total_gt))
        print_speed(iter + 1, t2 - t0, len(val_loader))
    logger.info('rpn300 recall=%f' % (total_rc / total_gt))
    fout.close()
    """
    eval the cityscapes for getting the map
    """

    # eval coco ap with official python api
    if args.dataset == 'coco':
        eval_coco_ap(args.results_dir, 'bbox', args.val_meta_file)
    else:
        Cal_MAP(args.results_dir, args.val_meta_file,
                int(cfg['shared']['num_classes']))

    return total_rc / total_gt