Exemplo n.º 1
0
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        im, target_scale, target_max_size)
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
Exemplo n.º 2
0
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        im, target_scale, target_max_size
    )
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
Exemplo n.º 3
0
def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    return blobs, im_scale
Exemplo n.º 4
0
def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    return blobs, im_scale
Exemplo n.º 5
0
def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    blobs['labels'] = np.zeros((1, cfg.MODEL.NUM_CLASSES), dtype=np.int32)
    return blobs, im_scale
Exemplo n.º 6
0
def _get_blobs(im, rois, target_scale, target_max_size):
    blobs = {}
    blobs['data'], im_scale = blob_utils.get_image_blob(im, target_scale, target_max_size)
    
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    
    blobs['labels'] = np.zeros((1, cfg.MODEL.NUM_CLASSES), dtype=np.int32)
    return blobs, im_scale
Exemplo n.º 7
0
def im_detect_mask_aug(model, im, boxes, blob_conv):
    assert not cfg.TEST.MASK_AUG.SCALE_SIZE_DEP, \
        'Size dependent scaling not implemented'

    # Collect masks computed under different transformations
    masks_ts = []

    # compute masks for the original image (identity transform)
    _, im_scale_i, _ = blob_utils.get_image_blob(im, cfg.TEST.SCALE,
                                                 cfg.TEST.MAX_SIZE)
    masks_i = im_detect_mask(model, im_scale_i, boxes, blob_conv)
    masks_ts.append(masks_i)

    if cfg.TEST.MASK_AUG.H_FLIP:
        masks_hf = im_detect_mask_hflip(model, im, cfg.TEST.SCALE,
                                        cfg.TEST.MAX_SIZE, boxes, blob_conv)
        masks_ts.append(masks_hf)

    ## Compute detections at different scales
    #for scale in cfg.TEST.MASK_AUG.SCALES:
    #    max_size = cfg.TEST.MASK_AUG.MAX_SIZE
    #    masks_scl = im_detect_mask_scale(model, im, scale, max_size, boxes, blob_conv)
    #    masks_ts.append(masks_scl)

    #    if cfg.TEST.MASK_AUG.SCALE_H_FLIP:
    #        masks_scl_hf = im_detect_mask_scale(model, im, scale, max_size, boxes, blob_conv, hflip=True)
    #        masks_ts.append(masks_scl_hf)

    ## Compute masks at different aspect ratios
    #for aspect_ratio in cfg.TEST.MASK_AUG.ASPECT_RATIOS:
    #    masks_ar = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, blob_conv)
    #    masks_ts.append(masks_ar)

    #    if cfg.TEST.MASK_AUG.ASPECT_RATIO_H_FLIP:
    #        masks_ar_hf = im_detect_mask_aspect_ratio(model, im, aspect_ratio, boxes, blob_conv, hflip=True)
    #        masks_ts.append(masks_ar)

    # Combine the predicted soft masks:
    if cfg.TEST.MASK_AUG.HEUR == 'SOFT_AVG':
        masks_c = np.mean(masks_ts, axis=0)
    elif cfg.TEST.MASK_AUG.HEUR == 'SOFT_MAX':
        masks_c = np.amax(masks_ts, axis=0)
    elif cfg.TEST.MASK_AUG.HEUR == 'LOGIT_AVG':

        def logit(y):
            return -1.0 * np.log((1.0 - y) / np.maximum(y, 1e-20))

        logit_masks = [logit(y) for y in masks_ts]
        logit_masks = np.mean(logit_masks, axis=0)
        masks_c = 1.0 / (1.0 + np.exp(-logit_masks))
    else:
        raise NotImplementedError('Heuristic {} not supported'.format(
            cfg.TEST.MASK_AUG.HEUR))
    return masks_c
Exemplo n.º 8
0
def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)

    if cfg.PYTORCH_VERSION_LESS_THAN_040:
        inputs['data'] = [
            Variable(torch.from_numpy(inputs['data']), volatile=True)
        ]
        inputs['im_info'] = [
            Variable(torch.from_numpy(inputs['im_info']), volatile=True)
        ]
    else:
        inputs['data'] = [torch.from_numpy(inputs['data'])]
        inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]

    return_dict = model(**inputs)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = ['rpn_rois_fpn' + str(l) for l in range(k_min, k_max + 1)]
        # note the spelling. Facebook Detectron uses rpn_roi_probs_fpn
        # this name is not used during training and is harmless
        score_names = [
            'rpn_rois_prob_fpn' + str(l) for l in range(k_min, k_max + 1)
        ]
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(
            [return_dict[roi_name].cpu().numpy() for roi_name in rois_names])
        scores = np.concatenate([
            return_dict[score_name].cpu().numpy() for score_name in score_names
        ]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = return_dict['rpn_rois'].cpu().numpy(),\
                        return_dict['rpn_roi_probs'].cpu().numpy()
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
Exemplo n.º 9
0
def im_detect_mask_scale(model,
                         im,
                         target_scale,
                         target_max_size,
                         boxes,
                         blob_conv,
                         hflip=False):
    """Computes masks at the given scale"""
    if hflip:
        masks_scl = im_detect_mask_hflip(model, im, target_scale,
                                         target_max_size, boxes, blob_conv)
    else:
        _, im_scale, _ = blob_utils.get_image_blob(im, target_scale,
                                                   target_max_size)
        masks_scl = im_detect_mask(model, im_scale, boxes, blob_conv)
    return masks_scl
Exemplo n.º 10
0
def im_detect_mask_hflip(model, im, target_scale, target_max_size, boxes,
                         blob_conv):
    """Performs mask detection on the horizontally flipped image.
    Function signature is the same as for im_detect_mask_aug.
    """
    # Compute the masks for the flipped image
    im_hf = im[:, ::-1, :]
    boxes_hf = box_utils.flip_boxes(boxes, im.shape[1])

    _, im_scale, _ = blob_utils.get_image_blob(im_hf, target_scale,
                                               target_max_size)
    # im_scale = im_conv_body_only(model, im_hf, target_scale, target_max_size)
    masks_hf = im_detect_mask(model, im_scale, boxes_hf, blob_conv)

    # Invert the predicted soft masks
    masks_inv = masks_hf[:, :, :, ::-1]

    return masks_inv
Exemplo n.º 11
0
def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))
    workspace.RunNet(model.net.Proto().name)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = [
            core.ScopedName('rpn_rois_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        score_names = [
            core.ScopedName('rpn_roi_probs_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        blobs = workspace.FetchBlobs(rois_names + score_names)
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(blobs[:len(rois_names)])
        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = workspace.FetchBlobs(
            [core.ScopedName('rpn_rois'),
             core.ScopedName('rpn_roi_probs')]
        )
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
    workspace.RunNet(model.net.Proto().name)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = [
            core.ScopedName('rpn_rois_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        score_names = [
            core.ScopedName('rpn_roi_probs_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        blobs = workspace.FetchBlobs(rois_names + score_names)
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(blobs[:len(rois_names)])
        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = workspace.FetchBlobs(
            [core.ScopedName('rpn_rois'),
             core.ScopedName('rpn_roi_probs')])
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
Exemplo n.º 13
0
def im_detect_mask_aspect_ratio(model,
                                im,
                                aspect_ratio,
                                boxes,
                                blob_conv,
                                hflip=False):
    """Computes mask detections at the given width-relative aspect ratio"""

    # perform mask detection on the transformed image
    im_ar = image_utils.aspect_ratio_rel(im, aspect_ratio)
    boxes_ar = box_utils.aspect_ratio(boxes, aspect_ratio)

    if hflip:
        masks_ar = im_detect_mask_hflip(model, im_ar, cfg.TEST.SCALE,
                                        cfg.TEST.MAX_SIZE, boxes_ar, blob_conv)
    else:
        _, im_scale, _ = blob_utils.get_image_blob(im, target_scale,
                                                   target_max_size)
        masks_ar = im_detect_mask(model, im_scale, boxes_ar, blob_conv)
    return masks_ar
Exemplo n.º 14
0
def im_proposals(model, im, roidb=None):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    inputs['data'] = [torch.from_numpy(inputs['data'])]
    inputs['im_info'] = [torch.from_numpy(inputs['im_info'])]
    if roidb is not None:
        inputs['roidb'] = [[roidb]]
    return_dict = model(**inputs)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois = [
            return_dict['rpn_rois_fpn' + str(l)]
            for l in range(k_min, k_max + 1)
        ]
        scores = [
            return_dict['rpn_rois_prob_fpn' + str(l)]
            for l in range(k_min, k_max + 1)
        ]
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(rois)
        scores = np.concatenate(scores).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes = return_dict['rpn_rois'].data.cpu().numpy()
        scores = return_dict['rpn_roi_probs'].data.cpu().numpy().squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
Exemplo n.º 15
0
def im_classify_bbox(model, im, box_proposals, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    timers['im_detect_bbox'].tic()
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    # do something to create the rois

    sampled_rois = box_proposals * inputs['im_info'][0, 2]
    repeated_batch_idx = blob_utils.zeros((sampled_rois.shape[0], 1))
    sampled_rois = np.hstack((repeated_batch_idx, sampled_rois))
    inputs['rois'] = sampled_rois
    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
        _add_multilevel_rois(inputs)

    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v)

    workspace.RunNet(model.net.Proto().name)
    if cfg.MODEL.TYPE == 'region_classification':
        cls_prob = core.ScopedName('cls_prob')
    elif cfg.MODEL.TYPE == 'region_memory':
        cls_prob = core.ScopedName('final/cls_prob')
    else:
        raise NotImplementedError
    cls_scores = workspace.FetchBlob(cls_prob)

    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    timers['misc_bbox'].toc()

    return cls_scores
Exemplo n.º 16
0
    merge_cfg_from_file(cfg_file)
    cfg.TRAIN.WEIGHTS = ''  # NOTE: do not download pretrained model weights
    cfg.TEST.WEIGHTS = weights_file
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    #according the cfg to bulid model
    model = initialize_model_from_cfg(weights_file)
    return model


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    args = parse_args()
    model = get_model(args.cfg, args.wts)
    img = cv2.imread(args.img)
    #im_scale = im_conv_body_only(model,img,cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        img, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    with c2_utils.NamedCudaScope(0):
        #  workspace.FeedBlob(core.ScopedName('data'), im_blob)
        #  workspace.RunNet(model.net.Proto().name)
        #  blob = workplace.FetchBlob('rois')
        # print 1
        cls_b, _, _ = infer_engine.im_detect_all(model, img, None)
        blobs = workspace.Blobs()
        print blobs
        mask_logits = workspace.FetchBlob(core.ScopedName('mask_logits'))
        #print mask_logits
        print mask_logits.shape
    np.save('/data1/shuai/adas/code/mask_logits.npy', mask_logits)
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    #     cls_probs, box_preds = [], []
    #     for lvl in range(k_min, k_max + 1):
    #         suffix = 'fpn{}'.format(lvl)
    #         cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
    #         box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    #     for k, v in inputs.items():
    #         workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    #     workspace.RunNet(model.net.Proto().name)
    #     cls_probs = workspace.FetchBlobs(cls_probs)
    #     box_preds = workspace.FetchBlobs(box_preds)
    return_dict = model(**inputs)

    cls_probs = return_dict['cls_score']
    box_preds = return_dict['bbox_pred']
    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((  # [1, 9, 80, 112, 160]
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((  # 1 9 4 112 160
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.data.cpu().numpy().squeeze().ravel()
        box_pred = box_pred.data.cpu().numpy()  #.squeeze()
        # print(box_pred.shape, box_pred.squeeze().shape) # 1 9 4 112 60 -> 9 4 112 160
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0  # 0.05 or 0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))

        inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[
            -pre_nms_topn:]  # select thos better than threshold and top 10000
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose(
        )  #to transform index according to data shape
        # before ravel: inds 5d shape (160, 112, 80, 10, 5)  from  (10, 80, 112, 160) torch.Size([1, 9, 80, 112, 160])
        # after ravel: inds 5d shape (10, 5)  from  (10,) torch.Size([1, 9, 80, 112, 160])
        # inds_5d: ?, anchor_id, classes, y, x
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes  #'''
    '''return None '''