Exemplo n.º 1
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    ref_scores_all = []
    ref_pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            concat_rois = output['concat_rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        rois, ref_rois = np.split(concat_rois, 2)
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        ref_scores = output['cls_prob_reshape_output'].asnumpy()[1]
        ref_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[1]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes /= scale

        ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas)
        ref_pred_boxes = clip_boxes(ref_pred_boxes, im_shape[-2:])
        ref_pred_boxes /= scale

        pred_boxes_all.append(pred_boxes)
        scores_all.append(scores)
        ref_pred_boxes_all.append(ref_pred_boxes)
        ref_scores_all.append(ref_scores)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 2
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN or cfg.network.ROIDispatch:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        if cfg.TEST.LEARN_NMS:
            pred_boxes = output['learn_nms_sorted_bbox'].asnumpy()
            # raw_scores = output['sorted_score_output'].asnumpy()
            scores = output['nms_final_score_output'].asnumpy()
        else:
            scores = output['cls_prob_reshape_output'].asnumpy()[0]
            bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 3
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds = [], []
        im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2)
        im_ids = np.array([], dtype=int)

        for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
        return scores, preds, data, im_ids
Exemplo n.º 4
0
    def forward(self, is_train, req, in_data, out_data, aux):

        rois = in_data[0].asnumpy()[:, 1:]
        cls_prob = in_data[1].asnumpy()
        assert self._cfg.CLASS_AGNOSTIC, 'Currently only support class agnostic'
        if self._cfg.CLASS_AGNOSTIC:
            bbox_deltas = in_data[2].asnumpy()[:, 4:8]
        else:
            fg_cls_prob = cls_prob[:, 1:]
            fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int)
            batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int)
            # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4: (fg_cls_idx + 1) * 4]
            in_data2 = in_data[2].asnumpy()
            bbox_deltas = np.hstack(
                (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1)))
        im_info = in_data[3].asnumpy()[0, :]

        # post processing
        # if self._is_train:
        #     if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
        #         bbox_deltas = bbox_deltas * np.array(self._cfg.TRAIN.BBOX_STDS) + np.array(self._cfg.TRAIN.BBOX_MEANS)

        proposals = bbox_pred(rois, bbox_deltas)
        proposals = clip_boxes(proposals, im_info[:2])

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))

        self.assign(out_data[0], req[0], blob)
Exemplo n.º 5
0
def get_image(roidb, config):
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(
            roi_rec['image'])
        im = cv2.imread(roi_rec['image'],
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS,
                              config.network.PIXEL_STDS)
        processed_ims.append(im_tensor)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(
            np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb
Exemplo n.º 6
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 7
0
def get_image(roidb, config):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
        im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb
Exemplo n.º 8
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    print('output length: {}'.format(len(output_all)))
    print('data batch length: {}'.format(len(data_batch.data)))
    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 9
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)

    if output_all[0].has_key('feat_conv_3x3_relu_output'):
        feat = output_all[0]['feat_conv_3x3_relu_output']
    else:
        feat = None
    return scores_all, pred_boxes_all, data_dict_all, feat
Exemplo n.º 10
0
def im_batch_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        im_infos = data_dict['im_info'].asnumpy()
        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois = output['rois_output'].asnumpy()
        for im_idx in xrange(im_infos.shape[0]):
            bb_idxs = np.where(rois[:, 0] == im_idx)[0]
            im_shape = im_infos[im_idx, :2].astype(np.int)

            # post processing
            pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :])
            pred_boxes = clip_boxes(pred_boxes, im_shape)

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale[im_idx]

            scores_all.append(scores[bb_idxs, :])
            pred_boxes_all.append(pred_boxes)

    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 11
0
def im_batch_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, data_batch.data[i])) for i in xrange(len(data_batch.data))]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        im_infos = data_dict['im_info'].asnumpy()
        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois = output['rois_output'].asnumpy()
        for im_idx in xrange(im_infos.shape[0]):
            bb_idxs = np.where(rois[:,0] == im_idx)[0]
            im_shape = im_infos[im_idx, :2].astype(np.int)

            # post processing
            pred_boxes = bbox_pred(rois[bb_idxs, 1:], bbox_deltas[bb_idxs, :])
            pred_boxes = clip_boxes(pred_boxes, im_shape)

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale[im_idx]

            scores_all.append(scores[bb_idxs, :])
            pred_boxes_all.append(pred_boxes)

    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 12
0
def get_image(roidb, config):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(roi_rec['image'])
        im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR|cv2.IMREAD_IGNORE_ORIENTATION)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb
Exemplo n.º 13
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds = [], []
        im_shapes = np.array([im.shape[-2:] for im in data['data']]).reshape(-1, self.batch_size, 2)
        im_ids = np.array([], dtype=int)

        for i, (gpu_out, gpu_scales, gpu_shapes) in enumerate(zip(outputs, scales, im_shapes)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            im_ids = np.hstack((im_ids, gpu_out[self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(cids) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
        return scores, preds, data, im_ids
Exemplo n.º 14
0
    def forward(self, is_train, req, in_data, out_data, aux):
        '''
        :param is_train:
        :param req:
        :param in_data: in_data[0] rois: (128, 5) First col are all 0's. True coordinate.
                        in_data[1] bbox_deltas: (128, 8)
                        in_data[2] im_info: im.shape = (im_info[0], im_info[1])
        :param out_data:
        :param aux:
        :return:
        '''
        rois = in_data[0].asnumpy()[:, 1:]  # (128, 4) Move 0's in first col.
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(rois, bbox_deltas)
        # 2. clip predicted boxes to image
        proposals = clip_boxes(
            proposals, im_info[:2]
        )  # (128, 8) First 4 cols: background, last 4 cols: object
        proposals = proposals[:, 4:]  # (128, 4)
        zeros = np.zeros((proposals.shape[0], 1), dtype=proposals.dtype)
        proposals = np.hstack((zeros, proposals))
        self.assign(out_data[0], req[0], proposals)

        if DEBUG:
            print proposals
Exemplo n.º 15
0
def det(mod, fn):
    
    raw_img = cv2.imread(fn)

    if raw_img.shape[0] < raw_img.shape[1]:
        raw_img = cv2.copyMakeBorder(raw_img,0
        ,raw_img.shape[1]-raw_img.shape[0], 0, 0, cv2.BORDER_CONSTANT)

    im_shape = [IMG_H,IMG_W] # reverse order
    img = cv2.resize(raw_img, (IMG_H,IMG_W))
    raw_h = img.shape[0]
    raw_w = img.shape[1]

    im_tensor = image.transform(img, [124,117,104], 0.0167)

    im_info = np.array([[  IMG_H,   IMG_W,   4.18300658e-01]])

    batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)])

    start = time.time()
    mod.forward(batch)

    output_names = mod.output_names
    output_tensor = mod.get_outputs()
    mod.get_outputs()[0].wait_to_read()
    print ("time", time.time()-start, "secs.")

    output = dict(zip(output_names ,output_tensor))

    rois = output['rois_output'].asnumpy()[:, 1:]
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    num_classes = 2

    all_cls_dets = [[] for _ in range(num_classes)]

    for j in range(1, num_classes):
        indexes = np.where(scores[:, j] > 0.1)[0]
        cls_scores = scores[indexes, j, np.newaxis]
        cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
        cls_dets = np.hstack((cls_boxes, cls_scores)).copy()
        all_cls_dets[j] = cls_dets

    for idx_class in range(1, num_classes):
        nms = py_nms_wrapper(0.3)
        keep = nms(all_cls_dets[idx_class])
        all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :]

    for i in range(all_cls_dets[1].shape[0]):
        cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1]))
        ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),1)

    
    cv2.imshow("w", img)
    cv2.waitKey()
Exemplo n.º 16
0
def get_image_m(scenedb, config):
    """
    preprocess image and return processed roidb
    :param scenedb: a list of scenedb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    views_list = ['top', 'left', 'right']
    num_images = len(scenedb)
    processed_ims = []
    processed_roidb = []
    processed_img_names = []
    for i in range(num_images):
        roi_rec = scenedb[i]
        im = None
        for view in views_list:
            img_fname = roi_rec['image_views'][view]
            assert os.path.exists(img_fname), '{0} does not exist'.format(
                img_fname)
            im_view = cv2.imread(
                img_fname, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            processed_img_names.append(img_fname)
            if view in ['left', 'right']:
                im_view = im_view.transpose(
                    1, 0,
                    2)  # transpose+flip = rotate clounterclockwise 90 deg
                im_view = im_view[::-1, :, :]  # flip about hor. axis
            if scenedb[i]['flipped']:
                im_view = im_view[:, ::-1, :]
            im = np.concatenate(
                (im, im_view), axis=2) if not im is None else im_view
        new_rec = roi_rec.copy()

        #Leonid, adding an ability for externally controlled augmentation to take place here
        if new_rec.has_key('aug_gen') and (new_rec['aug_gen'] is not None):
            im_, boxes_ = new_rec['aug_gen'](im, new_rec['boxes_views']['top'])
            im = im_
            new_rec['boxes_views']['top'][:, :4] = boxes_

        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform3(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)

        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(
            np.round(roi_rec['boxes_views']['top'].copy() * im_scale),
            im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb, processed_img_names
Exemplo n.º 17
0
def coco_results_one_category_kernel(data_pack):
    cat_id = data_pack['cat_id']
    ann_type = data_pack['ann_type']
    binary_thresh = data_pack['binary_thresh']
    all_im_info = data_pack['all_im_info']
    boxes = data_pack['boxes']
    if ann_type == 'bbox':
        masks = []
    elif ann_type == 'segm':
        masks = data_pack['masks']
    elif ann_type == 'keypoints':
        keypoints = data_pack['keypoints']
    else:
        print 'unimplemented ann_type: ' + ann_type
    cat_results = []
    for im_ind, im_info in enumerate(all_im_info):
        index = im_info['index']
        dets = boxes[im_ind].astype(np.float)
        if dets.size == 0:
            continue
        scores = dets[:, -1]
        if ann_type == 'bbox':
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'bbox': [xs[k], ys[k], ws[k], hs[k]],
                'score': scores[k]
            } for k in xrange(dets.shape[0])]
        elif ann_type == 'segm':
            width = im_info['width']
            height = im_info['height']
            dets[:, :4] = clip_boxes(dets[:, :4], [height, width])
            mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height,
                                        width, binary_thresh)
            result = [{
                'image_id': index,
                'category_id': cat_id,
                'segmentation': mask_encode[k],
                'score': scores[k]
            } for k in xrange(len(mask_encode))]
        elif ann_type == 'keypoints':
            result = [{
                'image_id':
                index,
                'category_id':
                cat_id,
                'keypoints':
                keypoints[im_ind][k, :].astype(np.uint16).tolist(),
                'score':
                scores[k]
            } for k in xrange(dets.shape[0])]

        cat_results.extend(result)
    return cat_results
Exemplo n.º 18
0
def double_im_detect(predictor, data_batch, data_names, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    ref_scores_all = []
    ref_pred_boxes_all = []
    for output, data_dict in zip(output_all, data_dict_all):

        scale = data_dict['im_info'][0, 2]

        rois = output['rois_output'].asnumpy()[:, 1:]
        ref_rois = output['ref_rois_output'].asnumpy()[:, 1:]
        im_shape = data_dict['data'].shape
        ref_im_shape = data_dict['ref_data']
        non_ref_dim = rois.shape[0]

        # save output
        scores = output['cls_prob_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_output'].asnumpy()[0]
        ref_scores = output['cls_prob_output'].asnumpy()[1]
        ref_bbox_deltas = output['bbox_pred_output'].asnumpy()[1]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        ref_pred_boxes = bbox_pred(ref_rois, ref_bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        ref_pred_boxes = clip_boxes(ref_pred_boxes, ref_im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale
        ref_pred_boxes = ref_pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)

        ref_scores_all.append(ref_scores)
        ref_pred_boxes_all.append(ref_pred_boxes)

    return scores_all, pred_boxes_all, ref_scores_all, ref_pred_boxes_all, data_dict_all
Exemplo n.º 19
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        tmp_w = rois[:, 2] - rois[:, 0] + 1
        tmp_h = rois[:, 3] - rois[:, 1] + 1
        tmp_area = np.sqrt(tmp_w * tmp_h)

        range1 = np.where(tmp_area <= 90)[0]
        range2 = np.where((tmp_area >= 30) & (tmp_area <= 160))[0]
        range3 = np.where(tmp_area >= 90)[0]
        range2_add = range2 + rois.shape[0]
        range3_add = range3 + rois.shape[0] * 2

        keep = np.hstack((range1, range2, range3))
        keep_add = np.hstack((range1, range2_add, range3_add))
        rois = rois[keep, :]
        scores = scores[keep_add, :]
        bbox_deltas = bbox_deltas[keep_add, :]

        #tot = rois.shape[0]

        #idxs1 = np.zeros((tot, 1), dtype=np.int32)
        #idxs2 = np.zeros((tot, 1), dtype=np.int32)
        #idxs3 = np.zeros((tot, 1), dtype=np.int32)
        #idxs1[range1, :] = 1
        #idxs2[range2, :] = 1
        #idxs3[range3, :] = 1

        #scores = (scores[:tot, :] * idxs1 + scores[tot:tot*2, :] * idxs2 + scores[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3)
        #bbox_deltas = (bbox_deltas[:tot, :] * idxs1 + bbox_deltas[tot:tot*2, :] * idxs2 + bbox_deltas[tot*2:tot*3, :] * idxs3) / (idxs1 + idxs2 + idxs3)

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 20
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    pred_kps_all = []
    for output, data_dict in zip(output_all, data_dict_all):
        if cfg.TEST.HAS_RPN:
            batch_rois = output['rois_output'].asnumpy()
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        batch_scores = output['cls_prob_reshape_output'].asnumpy()
        batch_bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()
        batch_im_info = data_dict['im_info'].asnumpy()

        for i in range(cfg.TEST.BATCH_IMAGES):
            scale = batch_im_info[i, 2]
            if scale < 1e-6:
                break
            indices = np.where(batch_rois[:, 0] == i)[0]
            rois = batch_rois[indices, 1:]
            scores = batch_scores[i]
            bbox_deltas = batch_bbox_deltas[i]

            # post processing
            pred_boxes = bbox_pred(rois, bbox_deltas)
            pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

            # we used scaled image & roi to train, so it is necessary to transform them back
            pred_boxes = pred_boxes / scale

            scores_all.append(scores)
            pred_boxes_all.append(pred_boxes)

        if cfg.network.PREDICT_KEYPOINTS:
            assert cfg.TEST.BATCH_IMAGES == 1, "only support batch_size=1"
            kps_deltas = output['kps_pos_pred_reshape_output'].asnumpy(
            )  # [N, 2*K, G, G]
            kps_probs = output['kps_prob_output'].asnumpy()  # [N*K, G*G]
            pred_kps = predict_keypoints(rois,
                                         kps_probs,
                                         kps_deltas,
                                         scale=scale)
            pred_kps_all.append(pred_kps)

    if cfg.network.PREDICT_KEYPOINTS:
        return scores_all, pred_boxes_all, pred_kps_all, data_dict_all
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 21
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    data_dict_all = [dict(zip(data_names, data_batch.data))]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all,scales):
        rois = output['rois_output'].asnumpy()[:,1:]

        im_shape = data_dict['data'].shape
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        stds = np.tile(np.array(cfg.TRAIN.BBOX_STDS),cfg.dataset.NUM_CLASSES)
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] *stds
        pred_boxes = bbox_pred(rois,bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
        if DEBUG:
          print("im shape: ",im_shape)
          print(pred_boxes.shape)
          print(scores.shape)
          max_scores = scores.argmax(axis = 1)
          max_scores_val = scores[np.arange(pred_boxes.shape[0]),max_scores]
          keep = np.where(max_scores>0)[0]
          max_scores = max_scores[keep]
          print(pred_boxes)
          bboxes = pred_boxes.copy()[keep]*scale
          max_scores_val = max_scores_val[keep]
          img = data_dict['data'].asnumpy().transpose((0,2,3,1))[0]
          img = (img * np.array([[[0.229, 0.224, 0.225]]]) +np.array([[[0.485, 0.456, 0.406]]])) * 255
          img = np.clip(img,0,255)
          img = img.astype(np.uint8)
          print(type(img))
          image = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
          print(img.shape)
          print(max_scores_val)
          maxid = max_scores_val.argsort()[-30:]
          for i, boxxes in enumerate(bboxes):
            if not i in maxid: continue
            #print("ith box:")
            #print(boxxes)
            #print(max_scores[i])
            box = boxxes[max_scores[i]*4:(max_scores[i]+1)*4]
            box = box.astype(np.int64)
            print(box)
            cv2.rectangle(image,tuple(box[:2]),tuple(box[2:]),(255,0,0),1)
            cv2.putText(image,names[max_scores[i]]+" "+str(max_scores_val[i]),tuple(box[:2]),cv2.FONT_HERSHEY_COMPLEX,1,(0,0,255),1)
          cv2.imwrite("./det_images/det_img_{:3f}.png".format(np.random.randn()),image)
          #pdb.set_trace()
          
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 22
0
Arquivo: demo.py Projeto: mrlooi/FCIS
def inference(predictor, data_batch, data_names, num_classes, BINARY_THRESH = 0.4, CONF_THRESH=0.7, gpu_id=0):
    scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
    im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))]

    scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
    if not config.TEST.USE_MASK_MERGE:
        all_boxes = [[] for _ in xrange(num_classes)]
        all_masks = [[] for _ in xrange(num_classes)]
        nms = py_nms_wrapper(config.TEST.NMS)
        for j in range(1, num_classes):
            indexes = np.where(scores[0][:, j] > CONF_THRESH)[0]
            cls_scores = scores[0][indexes, j, np.newaxis]
            cls_masks = masks[0][indexes, 1, :, :]
            # try:
            #     if config.CLASS_AGNOSTIC:
            #         cls_boxes = boxes[0][indexes, :]
            #     else:
            #         raise Exception()
            # except:
            if config.CLASS_AGNOSTIC:
                cls_boxes = boxes[0][indexes, :]
            else:
                cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            all_boxes[j] = cls_dets[keep, :]
            all_masks[j] = cls_masks[keep, :]
        dets = [all_boxes[j] for j in range(1, num_classes)]
        masks = [all_masks[j] for j in range(1, num_classes)]
    else:
        masks = masks[0][:, 1:, :, :]
        im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
        im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
        # print (im_height, im_width)
        boxes = clip_boxes(boxes[0], (im_height, im_width))
        result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes,
                                                    100, im_width, im_height,
                                                    config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                    BINARY_THRESH, gpu_id)

        dets = [result_dets[j] for j in range(1, num_classes)]
        masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)]

    for i in xrange(len(dets)):
        keep = np.where(dets[i][:,-1] > CONF_THRESH)
        dets[i] = dets[i][keep]
        masks[i] = masks[i][keep]

    return dets, masks
Exemplo n.º 23
0
def im_detect(predictor,
              data_batch,
              data_names,
              scales,
              cfg,
              aggr_feats=False):
    output_all = predictor.predict(data_batch)
    data_dict_all = [
        dict(zip(data_names, data_batch.data[i]))
        for i in xrange(len(data_batch.data))
    ]
    scores_all = []
    pred_boxes_all = []
    aggr_feats_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if 'blockgrad0_output' in output:
            for i, key in enumerate([
                    '_', 'rois_output', 'cls_prob_reshape_output',
                    'bbox_pred_reshape_output',
                    '_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL * 2 -
                                            1)
            ]):
                output[key] = output['blockgrad{}_output'.format(i)]
        if aggr_feats:
            aggr_feats_all.append(
                output['_plus{}_output'.format(cfg.TEST.KEY_FRAME_INTERVAL *
                                               2 - 1)])
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    if aggr_feats:
        return zip(scores_all, pred_boxes_all, data_dict_all), aggr_feats_all
    return zip(scores_all, pred_boxes_all, data_dict_all)
Exemplo n.º 24
0
def get_image(roidb, config):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    processed_img_names = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '{0} does not exist'.format(
            roi_rec['image'])
        im = cv2.imread(roi_rec['image'],
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        img_name = roi_rec['image']
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()

        #Leonid, adding an ability for externally controlled augmentation to take place here
        if new_rec.has_key('aug_gen') and (new_rec['aug_gen'] is not None):
            im_, boxes_ = new_rec['aug_gen'](im, new_rec['boxes'])
            im = im_
            new_rec['boxes'][:, :4] = boxes_

        scale_ind = random.randrange(len(config.SCALES))
        target_size = config.SCALES[scale_ind][0]
        max_size = config.SCALES[scale_ind][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        processed_ims.append(im_tensor)
        processed_img_names.append(img_name)
        im_info = [im_tensor.shape[2], im_tensor.shape[3], im_scale]
        new_rec['boxes'] = clip_boxes(
            np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb, processed_img_names
Exemplo n.º 25
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)
    data_dict_all = [dict(data_names, data_batch.data)]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all,scales):
        rois = output['rois_output'].asnumpy()[:,1:]

        im_shape = data_dict['data'].im_shape
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        pred_boxes = bbox_pred(rois,bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])
        pred_boxes = pred_boxes / scale
        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
Exemplo n.º 26
0
    def detect(self, batch, scales):
        data = dict(zip(self.data_names, batch.data))
        outputs = self.forward(batch)
        scores, preds, maps = [], [], []
        im_ids = np.array([], dtype=int)
        chip_ids = np.array([], dtype=int)
        has_focus_maps = True if self.rcnn_output_names[
            'scale_map'] in outputs[0] else False
        for i, (gpu_out, gpu_scales) in enumerate(zip(outputs, scales)):
            gpu_rois = gpu_out[self.rpn_output_names['rois']].asnumpy()
            # Reshape crois
            nper_gpu = gpu_rois.shape[0] / self.batch_size
            if has_focus_maps:
                scale_prob = gpu_out[
                    self.rcnn_output_names['scale_map']].asnumpy()
            gpu_scores = gpu_out[self.rcnn_output_names['cls']].asnumpy()
            gpu_deltas = gpu_out[self.rcnn_output_names['bbox']].asnumpy()
            gpu_infos = gpu_out[self.rcnn_output_names['im_info']].asnumpy()
            gpu_shapes = gpu_infos[:, :2]
            im_ids = np.hstack((im_ids, gpu_out[
                self.rcnn_output_names['im_ids']].asnumpy().astype(int)))
            chip_ids = np.hstack((chip_ids, gpu_out[
                self.rcnn_output_names['chip_ids']].asnumpy().astype(int)))
            for idx in range(self.batch_size):
                cids = np.where(gpu_rois[:, 0] == idx)[0]
                assert len(
                    cids
                ) == nper_gpu, 'The number of rois per GPU should be fixed!'
                crois = gpu_rois[cids, 1:]
                cscores = gpu_scores[idx]
                cdeltas = gpu_deltas[idx]

                # Apply deltas and clip predictions
                cboxes = bbox_pred(crois, cdeltas)
                cboxes = clip_boxes(cboxes, gpu_shapes[idx])

                # Re-scale boxes
                cboxes = cboxes / gpu_scales[idx]

                # Store predictions
                scores.append(cscores)
                preds.append(cboxes)
                if has_focus_maps:
                    maps.append(scale_prob[idx])

        return scores, preds, data, im_ids, maps, chip_ids
Exemplo n.º 27
0
def get_image(roidb, target_size, max_size, stride=0):
    """
    preprocess image and return processed roidb
    :param roidb: a list of roidb
    :return: list of img as in mxnet format
    roidb add new item['im_info']
    0 --- x (width, second dim of im)
    |
    y (height, first dim of im)
    """
    num_images = len(roidb)
    processed_ims = []
    processed_roidb = []
    for i in range(num_images):
        roi_rec = roidb[i]
        assert os.path.exists(roi_rec['image']), '%s does not exist'.format(
            roi_rec['image'])
        im = cv2.imread(roi_rec['image'], cv2.IMREAD_COLOR | 128)
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        new_rec = roi_rec.copy()
        im, im_scale = resize(im, target_size, max_size, stride=stride)
        processed_ims.append(im)
        im_info = [im.shape[0], im.shape[1], im_scale]
        new_rec['boxes'] = clip_boxes(
            np.round(roi_rec['boxes'].copy() * im_scale), im_info[:2])
        new_rec['im_info'] = im_info
        if 'keypoints' in roi_rec:
            new_rec['keypoints'] = roi_rec['keypoints'].copy()
            new_rec['keypoints'][:, 0::3] = np.round(
                roi_rec['keypoints'][:, 0::3] * im_scale)
            new_rec['keypoints'][:, 1::3] = np.round(
                roi_rec['keypoints'][:, 1::3] * im_scale)
            kps = new_rec['keypoints']

        DEBUG = False  ###
        if DEBUG:
            import cPickle as pickle
            out_dir = '/tmp/rcnn-debug/'
            rand_id = random.randint(100, 999)
            cv2.imwrite(out_dir + str(rand_id) + '.jpg', im)
            pickle.dump(new_rec, open(out_dir + str(rand_id) + '.pkl',
                                      "wb"))  ###

        processed_roidb.append(new_rec)
    return processed_ims, processed_roidb
Exemplo n.º 28
0
def im_detect_feats_stats(predictor,
                          data_batch,
                          data_names,
                          scales,
                          cfg,
                          stat_type,
                          scores_field='cls_prob_reshape'):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    rois_feats_all = []
    stats_all = []

    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output[scores_field + '_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        rois_feats = output['psp_final_embed_output'].asnumpy(
        )  # shape: [#rois, Embed_dim]
        rois_feats_all.append(rois_feats)

        if stat_type == 'ratio_val':
            stats = output['fc_val_reg_2_output'].asnumpy()
            stats_all.append(stats)
        if stat_type == 'feat_pred':
            stats = output['fc_score_hist_3_output'].asnumpy()
            stats_all.append(stats)

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all, rois_feats_all, stats_all
Exemplo n.º 29
0
    def forward(self, is_train, req, in_data, out_data, aux):

        bottom_rois = in_data[0].asnumpy()
        bbox_delta = in_data[1].asnumpy()
        cls_prob = in_data[2].asnumpy()
        im_info = in_data[3].asnumpy()

        num_rois = bottom_rois.shape[0]
        # 1. judge if bbox class-agnostic
        # 2. if not, calculate bbox_class_idx
        if self._bbox_class_agnostic:
            bbox_class_idx = np.ones((num_rois))  # (num_rois, 1) zeros
        else:
            bbox_class_idx = np.argmax(cls_prob[:, 1:], axis=1) + 1
        bbox_class_idx = bbox_class_idx[:, np.newaxis] * 4
        bbox_class_idx = np.hstack((bbox_class_idx, bbox_class_idx + 1,
                                    bbox_class_idx + 2, bbox_class_idx + 3))

        # 3. get bbox_pred given bbox_class_idx
        rows = np.arange(num_rois, dtype=np.intp)
        bbox_delta = bbox_delta[rows[:, np.newaxis],
                                bbox_class_idx.astype(np.intp)]

        # 4. calculate bbox_delta by bbox_pred[i] * std[i] + mean[i]
        means = np.array(self._bbox_means)
        stds = np.array(self._bbox_stds)
        vx = bbox_delta[:, 0] * stds[0] + means[0]
        vy = bbox_delta[:, 1] * stds[1] + means[1]
        vw = bbox_delta[:, 2] * stds[2] + means[2]
        vh = bbox_delta[:, 3] * stds[3] + means[3]
        bbox_delta = np.hstack((vx[:, np.newaxis], vy[:, np.newaxis],
                                vw[:, np.newaxis], vh[:, np.newaxis]))

        # 6. calculate top_rois by bbox_pred
        proposal = bbox_pred(bottom_rois[:, 1:], bbox_delta)

        # 7. clip boxes
        if self._b_clip_boxes:
            proposal = clip_boxes(proposal, im_info[0, :2])

        output = bottom_rois
        output[:, 1:] = proposal

        for ind, val in enumerate([output]):
            self.assign(out_data[ind], req[ind], val)
Exemplo n.º 30
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [
        dict(list(zip(data_names, idata))) for idata in data_batch.data
    ]
    scores_all = []
    pred_boxes_all = []
    roi_score_all = []
    rois_all = []
    roi_feat_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            assert np.all(output['rois_output'].asnumpy()[:, 0] == 0.)
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
        roi_score = output['rois_score'].asnumpy()
        indice = roi_score.flatten().argsort()[::-1]
        roi_score = roi_score[indice]
        roi_feat = output['roi_feat_output_output'].asnumpy()[indice]
        rois = rois[indice]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale
        rois = rois / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
        roi_score_all.append(roi_score)
        rois_all.append(rois)
        roi_feat_all.append(roi_feat)
    return scores_all, pred_boxes_all, roi_score_all, rois_all, roi_feat_all, data_dict_all
Exemplo n.º 31
0
    def forward(self, is_train, req, in_data, out_data, aux):

        rois = in_data[0].asnumpy()[:, 1:]
        bbox_deltas = in_data[1].asnumpy()[:, 4:8]
        im_info = in_data[2].asnumpy()[0, :]
        cls_prob = in_data[3].asnumpy()[:, 1:]  # ignore bg

        num_keep_index = int(rois.shape[0] * self._top)
        # sort scores
        max_scores = np.amax(cls_prob, axis=1)
        # keep top scores
        keep_index = np.argsort(-max_scores)[:num_keep_index]

        proposals = bbox_pred(rois, bbox_deltas)
        proposals = clip_boxes(proposals, im_info[:2])

        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        self.assign(out_data[0], req[0], blob[keep_index, :])
        self.assign(out_data[1], req[1], keep_index)
Exemplo n.º 32
0
def coco_results_one_category_kernel(data_pack):
    cat_id = data_pack['cat_id']
    ann_type = data_pack['ann_type']
    binary_thresh = data_pack['binary_thresh']
    all_im_info = data_pack['all_im_info']
    boxes = data_pack['boxes']
    if ann_type == 'bbox':
        masks = []
    elif ann_type == 'segm':
        masks = data_pack['masks']
    else:
        print 'unimplemented ann_type: ' + ann_type
    cat_results = []
    for im_ind, im_info in enumerate(all_im_info):
        index = im_info['index']
        dets = boxes[im_ind].astype(np.float)
        if len(dets) == 0:
            continue
        scores = dets[:, -1]
        if ann_type == 'bbox':
            xs = dets[:, 0]
            ys = dets[:, 1]
            ws = dets[:, 2] - xs + 1
            hs = dets[:, 3] - ys + 1
            result = [{'image_id': index,
                       'category_id': cat_id,
                       'bbox': [xs[k], ys[k], ws[k], hs[k]],
                       'score': scores[k]} for k in xrange(dets.shape[0])]
        elif ann_type == 'segm':
            width = im_info['width']
            height = im_info['height']
            dets[:, :4] = clip_boxes(dets[:, :4], [height, width])
            mask_encode = mask_voc2coco(masks[im_ind], dets[:, :4], height, width, binary_thresh)
            result = [{'image_id': index,
                       'category_id': cat_id,
                       'segmentation': mask_encode[k],
                       'score': scores[k]} for k in xrange(len(mask_encode))]
        cat_results.extend(result)
    return cat_results
Exemplo n.º 33
0
def im_detect(predictor, data_batch, data_names, scales, cfg):
    output_all = predictor.predict(data_batch)

    data_dict_all = [dict(zip(data_names, idata)) for idata in data_batch.data]
    scores_all = []
    pred_boxes_all = []
    for output, data_dict, scale in zip(output_all, data_dict_all, scales):
        if cfg.TEST.HAS_RPN:
            rois = output['rois_output'].asnumpy()[:, 1:]
        else:
            rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
        im_shape = data_dict['data'].shape

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        if cfg.DCR.top == 1:
            dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0]
            scores = scores * dcr_scores
        elif cfg.DCR.top > 0:
            dcr_scores = output['dcr_prob_reshape_output'].asnumpy()[0]
            keep_index = output['keep_index_reshape_output'].asnumpy().astype(
                np.int)[0]
            dcr_final_scores = np.ones_like(scores)
            dcr_final_scores[keep_index, :] = dcr_scores
            scores = scores * dcr_final_scores
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / scale

        scores_all.append(scores)
        pred_boxes_all.append(pred_boxes)
    return scores_all, pred_boxes_all, data_dict_all
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
            scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
Exemplo n.º 35
0
 def _cgenerate(boxes, width, height, chipsize, stride):
     boxes = clip_boxes(boxes, np.array([height - 1, width - 1]))
     return chips.generate(np.ascontiguousarray(boxes, dtype=np.float32),
                           width, height, chipsize, stride)
Exemplo n.º 36
0
    def _pygenerate(boxes, width, height, chipsize, stride):
        chips = []
        boxes = clip_boxes(boxes, np.array([height-1, width-1]))
        # ensure coverage of image for worst case
        # corners
        chips.append([max(width - chipsize, 0), 0, width - 1, min(chipsize, height-1)])
        chips.append([0, max(height - chipsize, 0), min(chipsize, width-1), height-1])
        chips.append([max(width - chipsize, 0), max(height - chipsize, 0), width-1, height-1])

        for i in range(0, width - int(chipsize), stride):
            for j in range(0, height - int(chipsize), stride):
                x1 = i
                y1 = j
                x2 = i + chipsize - 1
                y2 = j + chipsize - 1
                chips.append([x1, y1, x2, y2])

        for j in range(0, height - int(chipsize), stride):
            x1 = max(width - chipsize - 1,0)
            y1 = j
            x2 = width - 1
            y2 = j + chipsize - 1
            chips.append([x1, y1, x2, y2])

        for i in range(0, width - int(chipsize), stride):
            x1 = i
            y1 = max(height - chipsize - 1,0)
            x2 = i + chipsize - 1
            y2 = height - 1
            chips.append([x1, y1, x2, y2])

        chips = np.array(chips).astype(np.float)

        p = np.random.permutation(chips.shape[0])
        chips = chips[p]

        overlaps = ignore_overlaps(chips, boxes.astype(np.float))
        chip_matches = []
        num_matches = []
        for j in range(len(chips)):
            nvids = np.where(overlaps[j, :] == 1)[0]
            chip_matches.append(set(nvids.tolist()))
            num_matches.append(len(nvids))

        fchips = []
        totalmatches = 0
        while True:
            max_matches = 0
            max_match = max(num_matches)
            mid = np.argmax(np.array(num_matches))
            if max_match == 0:
                break
            if max_match > max_matches:
                max_matches = max_match
                maxid = mid
            bestchip = chip_matches[maxid]
            fchips.append(chips[maxid])
            totalmatches = totalmatches + max_matches

            # now remove all rois in bestchip
            for j in range(len(num_matches)):
                chip_matches[j] = chip_matches[j] - bestchip
                num_matches[j] = len(chip_matches[j])

        return fchips
Exemplo n.º 37
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        # 1. Generate proposals from bbox_deltas and shifted anchors
        # use real image size instead of padded feature map sizes
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        if DEBUG:
            print('score map size: {}'.format(scores.shape))
            print("resudial: {}".format(
                (scores.shape[2] - height, scores.shape[3] - width)))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Exemplo n.º 38
0
    def forward(self, is_train, req, in_data, out_data, aux):
        before_pyramid_proposal = datetime.now()
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        LAYER_NUM = len(in_data) / 2
        LAYER_NUM = 11
        if LAYER_NUM == 7:
            cls_prob_dict = {
                'stride64': in_data[6],
                'stride32': in_data[5],
                'stride16': in_data[4],
                'stride8': in_data[3],
                'stride4': in_data[2],
                'stride2': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[13],
                'stride32': in_data[12],
                'stride16': in_data[11],
                'stride8': in_data[10],
                'stride4': in_data[9],
                'stride2': in_data[8],
                'stride1': in_data[7],
            }

        elif LAYER_NUM == 6:
            cls_prob_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride16': in_data[3],
                'stride8': in_data[2],
                'stride4': in_data[1],
                'stride2': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[11],
                'stride32': in_data[10],
                'stride16': in_data[9],
                'stride8': in_data[8],
                'stride4': in_data[7],
                'stride2': in_data[6],
            }

        elif LAYER_NUM == 5:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
                'stride16': in_data[2],
                'stride8': in_data[1],
                'stride4': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
                'stride16': in_data[7],
                'stride8': in_data[6],
                'stride4': in_data[5],
            }
        elif LAYER_NUM == 2:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
            }
        elif LAYER_NUM == 11:
            cls_prob_dict = {
                'stride64': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[1],
            }
        elif LAYER_NUM == 1:
            cls_prob_dict = {
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride1': in_data[1],
            }
        elif LAYER_NUM == 3:
            cls_prob_dict = {
                'stride64': in_data[2],
                'stride32': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride1': in_data[3],
            }
        '''
        cls_prob_dict = {
            'stride8': in_data[3],
            'stride4': in_data[2],
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride8': in_data[7],
            'stride4': in_data[6],
            'stride2': in_data[5],
            'stride1': in_data[4],
        }
        '''
        '''
        cls_prob_dict = {
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride2': in_data[3],
            'stride1': in_data[2],
        }        
        '''
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []

        channel_list = []

        before_feat = datetime.now()

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape)
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]

            if DEBUG:
                scores1 = cls_prob_dict['stride' + str(s)].asnumpy()
                print "scores.shape:" + str(scores.shape)
                print "scores1.shape:" + str(scores1.shape)

            #print "scores.shape:"+str(scores.shape)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            #print "bbox_deltas.shape:"+str(bbox_deltas.shape)
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts

            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            before_enume = datetime.now()
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            after_enume = datetime.now()
            #print "enume time:"+str((after_enume-before_enume).seconds)
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))

            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
            if DEBUG:
                print "scores[:100]:" + str(scores[:50])
            channels = np.ones((scores.shape)) * stride

            # Convert anchors into proposals via bbox transformations
            before_pred = datetime.now()
            proposals = bbox_pred(anchors, bbox_deltas)
            after_pred = datetime.now()
            #print "pred_time:"
            #print (after_pred-before_pred).seconds
            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            if DEBUG:
                print str(min_size)
                print str(im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            if DEBUG:
                print "proposals3:" + str(proposals[0:10])
            scores = scores[keep]

            channels = channels[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
            channel_list.append(channels)
        after_feat = datetime.now()
        #print "feat time:"
        #print (after_feat-before_feat).seconds

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)
        channels = np.vstack(channel_list)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        before_sort = datetime.now()
        order = scores.ravel().argsort()[::-1]
        after_sort = datetime.now()
        #print "sort time:"
        #print (after_sort-before_sort).seconds
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        channels = channels[order]
        if DEBUG:
            print '-------1-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)

        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        channels = channels[keep]
        if DEBUG:
            print '-------2-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
            print "scores:" + str(scores[0:20])
        f_chan = open('channels.txt', 'w')
        for ii in range(channels.shape[0]):
            f_chan.write(str(channels[ii][0]) + ' ')
        f_chan.close()

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        #print "out_data[0].shape"+str(out_data[0].shape)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
        after_pyramid_proposal = datetime.now()