示例#1
0
def _get_blobs(im,
               rois,
               target_scale,
               target_max_size,
               pose_model=None,
               entry=None):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    if entry is not None:  # add pose to input
        blobs['data'], im_scale, blobs['im_info'], pose_blob, pose_line = \
            blob_utils.get_image_pose_blob(im, target_scale, target_max_size, entry)
        if 'ATR' in cfg.TEST.DATASETS[0]:
            blobs['pose_pred_4'], blobs['pose_pred_8'], blobs[
                'pose_pred_16'], blobs['pose_pred_32'] = _resize_pose_blob(
                    pose_blob, 26)
        else:
            blobs['pose_pred_4'], blobs['pose_pred_8'], blobs[
                'pose_pred_16'], blobs['pose_pred_32'] = _resize_pose_blob(
                    pose_blob, 26)
        blobs['pose_line_8'], blobs['pose_line_16'] = _resize_poseline_blob(
            pose_line)
    else:  # no pose input
        blobs['data'], im_scale, blobs['im_info'] = \
            blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    if pose_model is not None:
        blobs['pose_pred'] = np.asarray(pose_model.pred_pose_one_img(im),
                                        dtype=np.float32)

    return blobs, im_scale
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        im, target_scale, target_max_size)
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
示例#3
0
def im_extract_features(model, im, ffpn_levels, timers=None):
    """
    Extracts high level features listed in levels from model for the image im.
    """
    if timers is None:
        timers = defaultdict(Timer)
    timers['im_extract_features'].tic()

    # Get inputs to the caffe2 model
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)

    for k, v in blobs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    # Run the net forward
    workspace.RunNet(model.backbone.Proto().name)

    # Extract the features
    features = {}
    for feat in ffpn_levels:
        features[feat] = workspace.FetchBlob(core.ScopedName(feat))
    im_info = blobs['im_info']

    return features, im_info, im_scale, im.shape
示例#4
0
def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    return blobs, im_scale
示例#5
0
def _get_blobs(im, rois, target_scale, target_max_size):
    """Convert an image and RoIs within that image into network inputs."""
    blobs = {}
    blobs['data'], im_scale, blobs['im_info'] = \
        blob_utils.get_image_blob(im, target_scale, target_max_size)
    if rois is not None:
        blobs['rois'] = _get_rois_blob(rois, im_scale)
    return blobs, im_scale
示例#6
0
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _im_info = blob_utils.get_image_blob(
        im, target_scale, target_max_size
    )
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
    workspace.RunNet(model.net.Proto().name)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = [
            core.ScopedName('rpn_rois_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        score_names = [
            core.ScopedName('rpn_roi_probs_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        blobs = workspace.FetchBlobs(rois_names + score_names)
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(blobs[:len(rois_names)])
        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = workspace.FetchBlobs(
            [core.ScopedName('rpn_rois'),
             core.ScopedName('rpn_roi_probs')])
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
示例#8
0
def im_proposals(model, im):
    """Generate RPN proposals on a single image."""
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))
    workspace.RunNet(model.net.Proto().name)

    if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_RPN:
        k_max = cfg.FPN.RPN_MAX_LEVEL
        k_min = cfg.FPN.RPN_MIN_LEVEL
        rois_names = [
            core.ScopedName('rpn_rois_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        score_names = [
            core.ScopedName('rpn_roi_probs_fpn' + str(l))
            for l in range(k_min, k_max + 1)
        ]
        blobs = workspace.FetchBlobs(rois_names + score_names)
        # Combine predictions across all levels and retain the top scoring
        boxes = np.concatenate(blobs[:len(rois_names)])
        scores = np.concatenate(blobs[len(rois_names):]).squeeze()
        # Discussion: one could do NMS again after combining predictions from
        # the different FPN levels. Conceptually, it's probably the right thing
        # to do. For arbitrary reasons, the original FPN RPN implementation did
        # not do another round of NMS.
        inds = np.argsort(-scores)[:cfg.TEST.RPN_POST_NMS_TOP_N]
        scores = scores[inds]
        boxes = boxes[inds, :]
    else:
        boxes, scores = workspace.FetchBlobs(
            [core.ScopedName('rpn_rois'),
             core.ScopedName('rpn_roi_probs')]
        )
        scores = scores.squeeze()

    # Column 0 is the batch index in the (batch ind, x1, y1, x2, y2) encoding,
    # so we remove it since we just want to return boxes
    # Scale proposals back to the original input image scale
    boxes = boxes[:, 1:] / im_scale
    return boxes, scores
示例#9
0
def im_conv_body_only(model, im, target_scale, target_max_size):
    """Runs `model.conv_body_net` on the given image `im`."""
    im_blob, im_scale, _ = blob_utils.get_image_blob(im, target_scale,
                                                     target_max_size)
    workspace.FeedBlob(core.ScopedName('data'), im_blob)
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":
            kl_iter_num_for_range = int(os.environ.get('INT8KLNUM'))
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.conv_body_net.Proto())
    else:
        workspace.RunNet(model.conv_body_net.Proto().name)
    return im_scale
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(cls_prob_ravel[candidate_inds],
                               -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack([
                box_pred[0, ind:ind + 4, yi, xi]
                for ind, yi, xi in zip(box_cls_inds, y, x)
            ])
        pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                      if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
示例#11
0
def im_detect_bbox(model, im, timers=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False))

    workspace.RunNet(model.net.Proto().name)
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2. ** lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape((
            cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
            cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape((
            box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        cls_prob_ravel = cls_prob.ravel()
        # In some cases [especially for very small img sizes], it's possible that
        # candidate_ind is empty if we impose threshold 0.05 at all levels. This
        # will lead to errors since no detections are found for this image. Hence,
        # for lvl 7 which has small spatial resolution, we take the threshold 0.0
        th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
        candidate_inds = np.where(cls_prob_ravel > th)[0]
        if (len(candidate_inds) == 0):
            continue

        pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds))
        inds = np.argpartition(
            cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:]
        inds = candidate_inds[inds]

        inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose()
        classes = inds_5d[:, 2]
        anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4]
        scores = cls_prob[:, anchor_ids, classes, y, x]

        boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
        boxes *= stride
        boxes += cell_anchors[anchor_ids, :]

        if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
            box_deltas = box_pred[0, anchor_ids, :, y, x]
        else:
            box_cls_inds = classes * 4
            box_deltas = np.vstack(
                [box_pred[0, ind:ind + 4, yi, xi]
                 for ind, yi, xi in zip(box_cls_inds, y, x)]
            )
        pred_boxes = (
            box_utils.bbox_transform(boxes, box_deltas)
            if cfg.TEST.BBOX_REG else boxes)
        pred_boxes /= im_scale
        pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape)
        box_scores = np.zeros((pred_boxes.shape[0], 5))
        box_scores[:, 0:4] = pred_boxes
        box_scores[:, 4] = scores

        for cls in range(1, cfg.MODEL.NUM_CLASSES):
            inds = np.where(classes == cls - 1)[0]
            if len(inds) > 0:
                boxes_all[cls].extend(box_scores[inds, :])
    timers['im_detect_bbox'].toc()

    # Combine predictions across all levels and retain the top scoring by class
    timers['misc_bbox'].tic()
    detections = []
    for cls, boxes in boxes_all.items():
        cls_dets = np.vstack(boxes).astype(dtype=np.float32)
        # do class specific nms here
        keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
        cls_dets = cls_dets[keep, :]
        out = np.zeros((len(keep), 6))
        out[:, 0:5] = cls_dets
        out[:, 5].fill(cls)
        detections.append(out)

    # detections (N, 6) format:
    #   detections[:, :4] - boxes
    #   detections[:, 4] - scores
    #   detections[:, 5] - classes
    detections = np.vstack(detections)
    # sort all again
    inds = np.argsort(-detections[:, 4])
    detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

    # Convert the detections to image cls_ format (see core/test_engine.py)
    num_classes = cfg.MODEL.NUM_CLASSES
    cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
    for c in range(1, num_classes):
        inds = np.where(detections[:, 5] == c)[0]
        cls_boxes[c] = detections[inds, :5]
    timers['misc_bbox'].toc()

    return cls_boxes
示例#12
0
def im_detect_bbox(model, im, timers=None, model1=None):
    """Generate RetinaNet detections on a single image."""
    if timers is None:
        timers = defaultdict(Timer)

    if model1 is None and os.environ.get('COSIM'):
        print("cosim must has model1")

    fp32_ws_name = "__fp32_ws__"
    int8_ws_name = "__int8_ws__"
    # Although anchors are input independent and could be precomputed,
    # recomputing them per image only brings a small overhead
    anchors = _create_cell_anchors()
    timers['im_detect_bbox'].tic()
    timers['data1'].tic()
    k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
    A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS)
    inputs = {}
    inputs['data'], im_scale, inputs['im_info'] = \
        blob_utils.get_image_blob(im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, cfg.TEST.SIZEFIX)
    cls_probs, box_preds = [], []
    for lvl in range(k_min, k_max + 1):
        suffix = 'fpn{}'.format(lvl)
        cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix)))
        box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix)))
    for k, v in inputs.items():
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(int8_ws_name, True)
        workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32,
                                                        copy=False))
        if os.environ.get('COSIM'):
            workspace.SwitchWorkspace(fp32_ws_name, True)
            workspace.FeedBlob(core.ScopedName(k),
                               v.astype(np.float32, copy=False))
    timers['data1'].toc()
    if os.environ.get('EPOCH2OLD') == "1":
        workspace.RunNet(model.net.Proto().name)
    timers['run'].tic()
    if os.environ.get('INT8INFO') == "1":
        algorithm = AbsmaxCalib()
        kind = os.environ.get('INT8CALIB')
        if kind == "moving_average":
            ema_alpha = 0.5
            algorithm = EMACalib(ema_alpha)
        elif kind == "kl_divergence":

            kl_iter_num_for_range = os.environ.get('INT8KLNUM')
            if not kl_iter_num_for_range:
                kl_iter_num_for_range = 100
            kl_iter_num_for_range = int(kl_iter_num_for_range)
            algorithm = KLCalib(kl_iter_num_for_range)
        calib = Calibrator(algorithm)
        calib.RunCalibIter(workspace, model.net.Proto())
    else:
        if os.environ.get('COSIM'):
            with open("int8.txt", "wb") as p:
                p.write(str(model.net.Proto()))
            with open("fp32.txt", "wb") as p:
                p.write(str(model1.net.Proto()))
            for i in range(len(model.net.Proto().op)):
                workspace.SwitchWorkspace(int8_ws_name)
                int8_inputs = []
                for inp in model.net.Proto().op[i].input:
                    int8_inputs.append(workspace.FetchBlob(str(inp)))
                logging.warning(" opint8[{0}] is  {1}".format(
                    i,
                    model.net.Proto().op[i]))
                workspace.RunOperatorOnce(model.net.Proto().op[i])
                int8_results = []
                for res in model.net.Proto().op[i].output:
                    int8_results.append(workspace.FetchBlob(str(res)))
                workspace.SwitchWorkspace(fp32_ws_name)
                fp32_inputs = []
                for inp1 in model1.net.Proto().op[i].input:
                    fp32_inputs.append(workspace.FetchBlob(str(inp1)))
                logging.warning(" opfp32[{0}] is  {1}".format(
                    i,
                    model1.net.Proto().op[i]))
                workspace.RunOperatorOnce(model1.net.Proto().op[i])
                fp32_results = []
                for res1 in model1.net.Proto().op[i].output:
                    fp32_results.append(workspace.FetchBlob(str(res1)))
                if len(int8_inputs) != len(fp32_inputs):
                    logging.error("Wrong number of inputs")
                    return
                if len(int8_results) != len(fp32_results):
                    logging.error("Wrong number of outputs")
                    return
                logging.warning("begin to check op[{}] {} input".format(
                    i,
                    model.net.Proto().op[i].type))
                for k in range(len(int8_inputs)):
                    if model.net.Proto().op[i].input[k][0] == '_':
                        continue
                    #assert_allclose(int8_inputs[k], fp32_inputs[k], **tol)
                logging.warning("pass checking op[{0}] {1} input".format(
                    i,
                    model.net.Proto().op[i].type))
                logging.warning("begin to check op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
                for j, int8_result in enumerate(int8_results):
                    if model.net.Proto().op[i].output[j][0] == '_':
                        continue
                    #logging.warning("int8_outputis {} and fp32 output is {} ".format(int8_results[j], fp32_results[j]))
                    #if not compare_utils.assert_allclose(int8_results[j], fp32_results[j], **tol):
                    if not compare_utils.assert_compare(
                            int8_result, fp32_results[j], 1e-01,
                            os.environ.get('COSIM')):
                        for k, int8_input in enumerate(int8_inputs):
                            logging.warning("int8_input[{}] is {}".format(
                                k, int8_input))
                            logging.warning("fp32_input[{}] is {}".format(
                                k, fp32_inputs[k]))

                logging.warning("pass checking op[{0}] {1} output".format(
                    i,
                    model.net.Proto().op[i].type))
        else:
            workspace.RunNet(model.net.Proto().name)
    timers['run'].toc()
    cls_probs = workspace.FetchBlobs(cls_probs)
    box_preds = workspace.FetchBlobs(box_preds)

    # here the boxes_all are [x0, y0, x1, y1, score]
    boxes_all = defaultdict(list)

    batch_size = cls_probs[0].shape[0]
    boxes_all_list = [boxes_all] * batch_size
    cnt = 0
    for lvl in range(k_min, k_max + 1):
        # create cell anchors array
        stride = 2.**lvl
        cell_anchors = anchors[lvl]

        # fetch per level probability
        cls_prob = cls_probs[cnt]
        box_pred = box_preds[cnt]
        cls_prob = cls_prob.reshape(
            (cls_prob.shape[0], A, int(cls_prob.shape[1] / A),
             cls_prob.shape[2], cls_prob.shape[3]))
        box_pred = box_pred.reshape(
            (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3]))
        cnt += 1

        if cfg.RETINANET.SOFTMAX:
            cls_prob = cls_prob[:, :, 1::, :, :]

        for i in range(batch_size):
            cls_prob_ravel = cls_prob[i, :].ravel()

            # In some cases [especially for very small img sizes], it's possible that
            # candidate_ind is empty if we impose threshold 0.05 at all levels. This
            # will lead to errors since no detections are found for this image. Hence,
            # for lvl 7 which has small spatial resolution, we take the threshold 0.0
            th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0
            candidate_inds = np.where(cls_prob_ravel > th)[0]
            if (len(candidate_inds) == 0):
                continue

            pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N,
                               len(candidate_inds))
            inds = np.argpartition(cls_prob_ravel[candidate_inds],
                                   -pre_nms_topn)[-pre_nms_topn:]
            inds = candidate_inds[inds]

            inds_4d = np.array(np.unravel_index(
                inds, (cls_prob[i, :]).shape)).transpose()
            classes = inds_4d[:, 1]
            anchor_ids, y, x = inds_4d[:, 0], inds_4d[:, 2], inds_4d[:, 3]
            scores = cls_prob[i, anchor_ids, classes, y, x]
            boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32)
            boxes *= stride
            boxes += cell_anchors[anchor_ids, :]

            if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                box_deltas = box_pred[i, anchor_ids, :, y, x]
            else:
                box_cls_inds = classes * 4
                box_deltas = np.vstack([
                    box_pred[i, ind:ind + 4, yi, xi]
                    for ind, yi, xi in zip(box_cls_inds, y, x)
                ])
            pred_boxes = (box_utils.bbox_transform(boxes, box_deltas)
                          if cfg.TEST.BBOX_REG else boxes)
            pred_boxes /= im_scale
            pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im[0].shape)
            box_scores = np.zeros((pred_boxes.shape[0], 5))
            box_scores[:, 0:4] = pred_boxes
            box_scores[:, 4] = scores

            for cls in range(1, cfg.MODEL.NUM_CLASSES):
                inds = np.where(classes == cls - 1)[0]
                if len(inds) > 0:
                    boxes_all_list[i][cls].extend(box_scores[inds, :])

    timers['im_detect_bbox'].toc()

    cls_boxes_list = []
    for i in range(batch_size):
        boxes_all = boxes_all_list[i]
        # Combine predictions across all levels and retain the top scoring by class
        timers['misc_bbox'].tic()
        detections = []
        for cls, boxes in boxes_all.items():
            cls_dets = np.vstack(boxes).astype(dtype=np.float32)
            # do class specific nms here
            keep = box_utils.nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            out = np.zeros((len(keep), 6))
            out[:, 0:5] = cls_dets
            out[:, 5].fill(cls)
            detections.append(out)

        # detections (N, 6) format:
        #   detections[:, :4] - boxes
        #   detections[:, 4] - scores
        #   detections[:, 5] - classes
        detections = np.vstack(detections)
        # sort all again
        inds = np.argsort(-detections[:, 4])
        detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :]

        # Convert the detections to image cls_ format (see core/test_engine.py)
        num_classes = cfg.MODEL.NUM_CLASSES
        cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)]
        for c in range(1, num_classes):
            inds = np.where(detections[:, 5] == c)[0]
            cls_boxes[c] = detections[inds, :5]
        cls_boxes_list.append(cls_boxes)

    timers['misc_bbox'].toc()

    return cls_boxes_list