示例#1
0
文件: imdb.py 项目: aelleon/SNIPER
    def load_rpn_data(self, proposal_path='data/proposals', full=False):
        rpn_file = os.path.join(proposal_path, self.name + '_rpn.pkl')
        nms_cache_file = os.path.join(proposal_path, self.name+'_rpn_after_nms.pkl')

        if os.path.isfile(nms_cache_file):
            print('Reading cached proposals after ***NMS**** from {}'.format(nms_cache_file))
            with open(nms_cache_file,'rb') as file:
                [boxes,maps] = cPickle.load(file)

            print('Done!')
        else:
            print rpn_file
            print 'Loading {}....'.format(rpn_file)
            assert os.path.exists(rpn_file), 'rpn data not found at {}'.format(rpn_file)
            with open(rpn_file, 'rb') as f:
                box_list = cPickle.load(f)
            print 'Done!'
            ttboxes = []
            boxes = []
            maps = []
            print 'Applying NMS...'
            nms = py_nms_wrapper(0.7)

            for i in range(len(box_list)):
                tboxes = np.array(box_list[i])
                ttboxes.append(tboxes)

            p = Pool(32)
            keeps = p.map(nmsp, ttboxes)
            print('Done!')
            for i in range(len(box_list)):
                boxes.append(ttboxes[i][keeps[i]])
            p.close()
            print('Caching proposals after NMS to {}'.format(nms_cache_file))
            with open(nms_cache_file,'wb') as file:
                cPickle.dump([boxes,maps],file,cPickle.HIGHEST_PROTOCOL)
            print('Done!')
        return boxes, maps
示例#2
0
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    idx = 0
    data_time, net_time, post_time = 0.0, 0.0, 0.0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect(predictor, data_batch, data_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)):
            for j in range(1, imdb.num_classes):
                indexes = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[indexes, j, np.newaxis]
                cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[indexes, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j][idx+delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([all_boxes[j][idx+delta][:, -1]
                                          for j in range(1, imdb.num_classes)])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(all_boxes[j][idx+delta][:, -1] >= image_thresh)[0]
                        all_boxes[j][idx+delta] = all_boxes[j][idx+delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [all_boxes[j][idx+delta] for j in range(1, imdb.num_classes)]
                vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg)

        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        data_time += t1
        net_time += t2
        post_time += t3
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)
        if logger:
            logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size))

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("indir",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Directory containing list of images")
    parser.add_argument("outfile",
                        type=lambda s: unicode(s, 'utf8'),
                        help="Path to write predictions")
    parser.add_argument("-d",
                        "--device",
                        type=int,
                        default=0,
                        help="Device ID to use")
    args = parser.parse_args()
    params = vars(args)

    # ---------------------------------------------------------- Read config
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    config['gpus'] = str(params['device'])

    # ---------------------------------------------------------- Load Images
    image_path_list = []
    data = []
    scale_factor = 1.0
    img_dir = osp.abspath(params['indir'])
    det_thresh = 0.7

    # Load abs paths of images
    for f in sorted(os.listdir(img_dir)):
        _, f_ext = osp.splitext(f)
        if f_ext in ['.jpg', '.png', '.jpeg']:
            f_path = osp.join(img_dir, f)
            image_path_list.append(f_path)

    print 'Loading {} images into memory...'.format(len(image_path_list))

    for image_path in image_path_list:
        im = cv2.imread(image_path,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        height, width = im.shape[:2]
        im = cv2.resize(
            im, (int(scale_factor * width), int(scale_factor * height)))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    print 'Loaded {} images'.format(len(image_path_list))

    # ---------------------------------------------------------- Predict
    predictions = []

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        '/BS/orekondy2/work/opt/FCIS/model/fcis_coco', 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, image_path in enumerate(image_path_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print '{} testing {} {:.4f}s'.format(idx, image_path, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > det_thresh)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(image_path_list[idx])
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        org_height, org_width = cv2.imread(image_path_list[idx]).shape[:2]
        # im = cv2.resize(im,(int(scale_factor*org_width), int(scale_factor*org_height)))
        """
        visualize all detections in one image
        :param im_array: [b=1 c h w] in rgb
        :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ]
        :param class_names: list of names in imdb
        :param scale: visualize the scaled image
        :return:
        """
        detections = dets
        class_names = classes
        cfg = config
        scale = 1.0

        person_idx = class_names.index('person')
        dets = detections[person_idx]
        msks = masks[person_idx]

        for mask_idx, (det, msk) in enumerate(zip(dets, msks)):
            inst_arr = np.zeros_like(im[:, :, 0])  # Create a 2D W x H array
            bbox = det[:4] * scale
            cod = bbox.astype(int)
            if im[cod[1]:cod[3], cod[0]:cod[2], 0].size > 0:
                msk = cv2.resize(
                    msk, im[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1, 0].T.shape)
                bimsk = (msk >= cfg.BINARY_THRESH).astype('uint8')

                # ------- Create bit-mask for this instance
                inst_arr[cod[1]:cod[3] + 1, cod[0]:cod[2] +
                         1] = bimsk  # Add thresholded binary mask
                rs_inst_arr = scipy.misc.imresize(inst_arr,
                                                  (org_height, org_width))
                rle = mask.encode(np.asfortranarray(rs_inst_arr))

                predictions.append({
                    'image_path': image_path,
                    'label': 'person',
                    'segmentation': rle,
                    'bbox': bbox.tolist(),
                    'score': det[-1],
                })

                del msk
                del bimsk
                del rs_inst_arr

    print 'Created {} predictions'.format(len(predictions))

    # ---------------------------------------------------------- Write output
    with open(params['outfile'], 'wb') as wf:
        json.dump(predictions, wf, indent=2)
示例#4
0
    def write_vid_results_multiprocess(self, detection, gpu_id):
        """
        write results files in pascal devkit path
        :param all_boxes: boxes to be processed [bbox, confidence]
        :return: None
        """

        print 'Writing {} ImageNetVID results file'.format('all')
        filename = self.get_result_file_template(gpu_id).format('all')
        frame_seg_len = self.frame_seg_len
        nms = py_nms_wrapper(0.3)
        data_time = 0
        all_boxes = detection[0]
        frame_ids = detection[1]
        start_idx = 0
        sum_frame_ids = np.cumsum(frame_seg_len)
        first_true_id = frame_ids[0]
        start_video = np.searchsorted(sum_frame_ids, first_true_id)

        for im_ind in range(1, len(frame_ids)):
            t = time.time()
            true_id = frame_ids[im_ind]
            video_index = np.searchsorted(sum_frame_ids, true_id)
            if (video_index != start_video):  # reprensents a new video
                t1 = time.time()
                video = [
                    all_boxes[j][start_idx:im_ind]
                    for j in range(1, self.num_classes)
                ]
                dets_all = seq_nms(video)
                for j in xrange(1, self.num_classes):
                    for frame_ind, dets in enumerate(dets_all[j - 1]):
                        keep = nms(dets)
                        all_boxes[j][frame_ind + start_idx] = dets[keep, :]
                start_idx = im_ind
                start_video = video_index
                t2 = time.time()
                print 'video_index=', video_index, '  time=', t2 - t1
            data_time += time.time() - t
            if (im_ind % 100 == 0):
                print '{} seq_nms testing {} data {:.4f}s'.format(
                    frame_ids[im_ind - 1], im_ind, data_time / im_ind)

        # the last video
        video = [
            all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes)
        ]
        dets_all = seq_nms(video)
        for j in xrange(1, self.num_classes):
            for frame_ind, dets in enumerate(dets_all[j - 1]):
                keep = nms(dets)
                all_boxes[j][frame_ind + start_idx] = dets[keep, :]

        with open(filename, 'wt') as f:
            for im_ind in range(len(frame_ids)):
                for cls_ind, cls in enumerate(self.classes):
                    if cls == '__background__':
                        continue
                    dets = all_boxes[cls_ind][im_ind]
                    if len(dets) == 0:
                        continue
                    # the imagenet expects 0-based indices
                    for k in range(dets.shape[0]):
                        f.write(
                            '{:d} {:d} {:.4f} {:.2f} {:.2f} {:.2f} {:.2f}\n'.
                            format(frame_ids[im_ind], cls_ind, dets[k, -1],
                                   dets[k, 0], dets[k, 1], dets[k, 2],
                                   dets[k, 3]))
        return
示例#5
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_fgfa_flownet_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names
    num_classes = 31
    classes = [
        '__background__', 'airplane', 'antelope', 'bear', 'bicycle', 'bird',
        'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox',
        'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey',
        'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel',
        'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'
    ]

    # load demo data

    image_names = sorted(
        glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG'))
    output_dir = cur_path + '/../demo/rfcn_fgfa/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({
            'data': im_tensor,
            'im_info': im_info,
            'data_cache': im_tensor,
            'feat_cache': im_tensor
        })

    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
        ('feat_cache',
         ((19, cfg.network.FGFA_FEAT_DIM,
           np.ceil(max([v[0]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int),
           np.ceil(max([v[1]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int))))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 0, process=True)

    feat_predictors = Predictor(feat_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)

    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]],
                                 label=[],
                                 pad=0,
                                 index=idx,
                                 provide_data=[[
                                     (k, v.shape)
                                     for k, v in zip(data_names, data[idx])
                                 ]],
                                 provide_label=[None])
    scales = [
        data_batch.data[i][1].asnumpy()[0, 2]
        for i in xrange(len(data_batch.data))
    ]
    all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 1e-3
    for idx, element in enumerate(data):
        file_name = '{:06d}'.format(file_idx)
        data_batch = mx.io.DataBatch(data=[element],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, element)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        if (idx != len(data) - 1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_name, out_im)
                print 'testing {} {:.4f}s'.format(file_name + '.JPEG',
                                                  total_time / (file_idx + 1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_name, out_im)
                print 'testing {} {:.4f}s'.format(file_name + '.JPEG',
                                                  total_time / (file_idx + 1))
                file_idx += 1
                end_counter += 1

    if (cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            file_name = '{:06d}'.format(idx)
            boxes_this_image = [[]] + [
                all_boxes[j][idx] for j in range(1, num_classes)
            ]
            out_im = draw_all_detection(data[idx][0].asnumpy(),
                                        boxes_this_image, classes, scales[0],
                                        cfg)
            save_image(output_dir, file_name, out_im)

    print 'done'
示例#6
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = [
        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
        'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
        'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
        'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
        'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
        'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
        'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]
    # load demo data
    image_names = [
        'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'
    ]
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        cur_path + '/../model/' +
        ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'),
        0,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.cpu()],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = py_nms_wrapper(config.TEST.NMS)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1, "marked_{}".format(im_name))

    print 'done'
示例#7
0
def pred_eval_quadrangle(predictor,
                         test_data,
                         imdb,
                         cfg,
                         vis=False,
                         draw=False,
                         thresh=1e-3,
                         logger=None,
                         ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        # imdb.count_ar()
        imdb.check_transform()
        imdb.draw_gt_and_detections(all_boxes, thresh=0.1)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 9 array of detections in
    #    (x1, y1, x2, y2, x3, y3, x4, y4, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    idx = 0
    data_time, net_time, post_time = 0.0, 0.0, 0.0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect_quadrangle(
            predictor, data_batch, data_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        for delta, (scores, boxes, data_dict) in enumerate(
                zip(scores_all, boxes_all, data_dict_all)):
            # idx = int(data_dict['im_index'])-1
            for j in range(1, imdb.num_classes):
                indexes = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[indexes, j, np.newaxis]
                cls_boxes = boxes[indexes,
                                  8:16] if cfg.CLASS_AGNOSTIC else boxes[
                                      indexes, j * 8:(j + 1) * 8]
                temp_cls_boxes = np.zeros((cls_boxes.shape[0], 4),
                                          dtype=cls_boxes.dtype)
                temp_cls_boxes_x = np.vstack((cls_boxes[:, 0], cls_boxes[:, 2],
                                              cls_boxes[:, 4], cls_boxes[:,
                                                                         6]))
                temp_cls_boxes_y = np.vstack((cls_boxes[:, 1], cls_boxes[:, 3],
                                              cls_boxes[:, 5], cls_boxes[:,
                                                                         7]))
                temp_cls_boxes[:, 0] = np.amin(temp_cls_boxes_x, axis=0)
                temp_cls_boxes[:, 1] = np.amin(temp_cls_boxes_y, axis=0)
                temp_cls_boxes[:, 2] = np.amax(temp_cls_boxes_x, axis=0)
                temp_cls_boxes[:, 3] = np.amax(temp_cls_boxes_y, axis=0)
                cls_dets = np.hstack((temp_cls_boxes, cls_scores))
                cls_quadrangle_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j][idx + delta] = cls_quadrangle_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                vis_all_detection(data_dict['data'].asnumpy(),
                                  boxes_this_image, imdb.classes,
                                  scales[delta], cfg)

            if draw:
                if not os.path.isdir(cfg.TEST.save_img_path):
                    os.mkdir(cfg.TEST.save_img_path)
                path = os.path.join(cfg.TEST.save_img_path, str(idx) + '.jpg')
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                im = draw_all_quadrangle_detection(data_dict['data'].asnumpy(),
                                                   boxes_this_image,
                                                   imdb.classes,
                                                   scales[delta],
                                                   cfg,
                                                   threshold=0.2)
                print path
                cv2.imwrite(path, im)

        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        data_time += t1
        net_time += t2
        post_time += t3
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, imdb.num_images, data_time / idx * test_data.batch_size,
            net_time / idx * test_data.batch_size,
            post_time / idx * test_data.batch_size)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, imdb.num_images,
                    data_time / idx * test_data.batch_size,
                    net_time / idx * test_data.batch_size,
                    post_time / idx * test_data.batch_size))

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    imdb.draw_gt_and_detections(all_boxes, thresh=0.1)
    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
示例#8
0
def net_inference(model):
    """
    generate data_batch -> im_detect -> post process
    :param predictor: Predictor
    :param image_name: image name
    :return: None
    """

    # datas = json.loads(args)
    predictor = model['predictor']
    classes = model['classes']
    threshold = model['threshold']
    thresholds = model['thresholds']
    rets = []
    nms = py_nms_wrapper(config.TEST.NMS)
    box_voting = py_box_voting_wrapper(config.TEST.BOX_VOTING_IOU_THRESH,
                                       config.TEST.BOX_VOTING_SCORE_THRESH,
                                       with_nms=True)
    try:
        time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
        for i in sorted(os.listdir("/tmp/eval/init/images")):
            imageFile = os.path.join("/tmp/eval/init/images", i)
            print('-*-' * 50)
            print(imageFile)
            _t1 = time.time()
            try:
                im = load_image(imageFile, 50.0)
            except ErrorBase as e:
                rets.append({"code": e.code, "message": e.message})
                continue

            data_batch, data_names, im_scale = generate_batch(im)
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, im_scale, config)
            det_ret = []
            for cls_index, cls in enumerate(classes[1:], start=1):
                if len(cls) > 1:
                    cls_ind = int(cls[0])
                    cls_name = cls[1]
                else:
                    cls_ind = cls_index
                    cls_name = cls[0]
                cls_boxes = boxes[0][:,
                                     4:8] if config.CLASS_AGNOSTIC else boxes[
                                         0][:, 4 * cls_ind:4 * (cls_ind + 1)]
                cls_scores = scores[0][:, cls_ind, np.newaxis]
                if len(classes) <= len(thresholds):
                    threshold = thresholds[cls_ind]
                keep = np.where(cls_scores >= threshold)[0]
                dets = np.hstack(
                    (cls_boxes, cls_scores)).astype(np.float32)[keep, :]
                # if "20170930_guns_1083.jpg" in imageFile:
                #     np.savetxt(fileOp, dets, delimiter=",")
                #     pass
                keep = nms(dets)
                # if "20170930_guns_1083.jpg" in imageFile:
                #     # print("end"*10)
                #     # print(dets[keep, :])
                #     # print('*'*100)
                #     # for i in dets:
                #     #     fileOp_1_op.write(i)
                #     #     fileOp_1_op.write('\n')
                #     np.savetxt(fileOp_1, dets[keep, :], delimiter=",")
                det_ret.extend(
                    _build_result(det, cls_name, cls_ind)
                    for det in dets[keep, :])

            _t2 = time.time()
            print("inference image time : %f" % (_t2 - _t1))
            rets.append(
                dict(code=0,
                     message=imageFile,
                     result=json.dumps(dict(detections=det_ret))))

    except Exception, e:
        print(traceback.format_exc())
示例#9
0
def pred_double_eval(predictor,
                     test_data,
                     imdb,
                     cfg,
                     vis=False,
                     thresh=1e-3,
                     logger=None,
                     ignore_cache=True,
                     show_gt=False):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]
    label_names = [k[0] for k in test_data.provide_label[0]]
    num_images = test_data.size

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    #if cfg.TEST.SOFTNMS:
    #    nms = py_softnms_wrapper(cfg.TEST.NMS)
    #else:
    #    nms = py_nms_wrapper(cfg.TEST.NMS)

    if cfg.TEST.SOFTNMS:
        nms = py_softnms_wrapper(cfg.TEST.NMS)
    else:
        nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    ref_all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(imdb.num_classes)]
    # class_lut = [[] for _ in range(imdb.num_classes)]
    valid_tally = 0
    valid_sum = 0

    idx = 0
    t = time.time()
    inference_count = 0
    all_inference_time = []
    post_processing_time = []
    nms_full_count = []
    nms_pos_count = []
    is_max_count = []
    all_count = []
    for im_info, ref_im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all, label_dict_all = im_double_detect(
            predictor, data_batch, data_names, label_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        # for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)):
        nms_full_count_per_batch = 0
        nms_pos_count_per_batch = 0
        global num_of_is_full_max
        is_max_count_per_batch = num_of_is_full_max[0]
        all_count_per_batch = 0
        for delta, (scores, boxes, ref_scores, ref_boxes, data_dict,
                    label_dict) in enumerate(
                        zip(scores_all, boxes_all, ref_scores_all,
                            ref_boxes_all, data_dict_all, label_dict_all)):
            if cfg.TEST.LEARN_NMS:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j - 1, 0] > thresh)[0]
                    cls_scores = scores[indexes, j - 1, :]
                    cls_boxes = boxes[indexes, j - 1, :]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)

                    all_boxes[j][idx + delta] = cls_dets

                    if DEBUG:
                        keep = nms(cls_dets)
                        nms_cls_dets = cls_dets[keep, :]
                        target = label_dict['nms_multi_target']
                        target_indices = np.where(target[:, 4] == j - 1)
                        target = target[target_indices]
                        nms_full_count_per_batch += bbox_equal_count(
                            nms_cls_dets, target)

                        gt_boxes = label_dict['gt_boxes'][0].asnumpy()
                        gt_boxes = gt_boxes[np.where(gt_boxes[:,
                                                              4] == j)[0], :4]
                        gt_boxes /= scales[delta]

                        if len(cls_boxes) != 0 and len(gt_boxes) != 0:
                            overlap_mat = bbox_overlaps(
                                cls_boxes.astype(np.float),
                                gt_boxes.astype(np.float))
                            keep = nms(
                                cls_dets[np.where(overlap_mat > 0.5)[0]])
                            nms_cls_dets = cls_dets[np.where(
                                overlap_mat > 0.5)[0]][keep]
                            nms_pos_count_per_batch += bbox_equal_count(
                                nms_cls_dets, target)
                        all_count_per_batch += len(target)
            else:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j] > thresh)[0]
                    if cfg.TEST.FIRST_N > 0:
                        # todo: check whether the order affects the result
                        sort_indices = np.argsort(
                            scores[:, j])[-cfg.TEST.FIRST_N:]
                        # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N]
                        indexes = np.intersect1d(sort_indices, indexes)

                    cls_scores = scores[indexes, j, np.newaxis]
                    cls_boxes = boxes[indexes,
                                      4:8] if cfg.CLASS_AGNOSTIC else boxes[
                                          indexes, j * 4:(j + 1) * 4]
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx+delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)
                        # print np.min(cls_scores), valid_tally, valid_sum
                        # cls_scores = scores[:, j, np.newaxis]
                        # cls_scores[cls_scores <= thresh] = thresh
                        # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    if cfg.TEST.SOFTNMS:
                        all_boxes[j][idx + delta] = nms(cls_dets)
                    else:
                        keep = nms(cls_dets)
                        all_boxes[j][idx + delta] = cls_dets[keep, :]
                        # all_boxes[j][idx + delta] = cls_dets

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                if show_gt:
                    gt_boxes = label_dict['gt_boxes'][0]
                    for gt_box in gt_boxes:
                        gt_box = gt_box.asnumpy()
                        gt_cls = int(gt_box[4])
                        gt_box = gt_box / scales[delta]
                        gt_box[4] = 1
                        if cfg.TEST.LEARN_NMS:
                            gt_box = np.append(gt_box, 1)
                        boxes_this_image[gt_cls] = np.vstack(
                            (boxes_this_image[gt_cls], gt_box))

                    if cfg.TEST.LEARN_NMS:
                        target_boxes = label_dict['nms_multi_target']
                        for target_box in target_boxes:
                            print("cur", target_box * scales[delta])
                            target_cls = int(target_box[4]) + 1
                            target_box[4] = 2 + target_box[5]
                            target_box[5] = target_box[6]
                            target_box = target_box[:6]
                            boxes_this_image[target_cls] = np.vstack(
                                (boxes_this_image[target_cls], target_box))
                # vis_all_detection(data_dict['ref_data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg)
                # vis_double_all_detection(data_dict['data'].asnumpy(), boxes_this_image, data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg)
            if cfg.TEST.LEARN_NMS:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(ref_scores[:, j - 1, 0] > thresh)[0]
                    cls_scores = ref_scores[indexes, j - 1, :]
                    cls_boxes = ref_boxes[indexes, j - 1, :]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(ref_scores)
                    ref_all_boxes[j][idx + delta] = cls_dets

                    if DEBUG:
                        pass
                        keep = nms(cls_dets)
                        nms_cls_dets = cls_dets[keep, :]
                        target = label_dict['ref_nms_multi_target']
                        target_indices = np.where(target[:, 4] == j - 1)
                        target = target[target_indices]
                        nms_full_count_per_batch += bbox_equal_count(
                            nms_cls_dets, target)

                        gt_boxes = label_dict['ref_gt_boxes'][0].asnumpy()
                        gt_boxes = gt_boxes[np.where(gt_boxes[:,
                                                              4] == j)[0], :4]
                        gt_boxes /= scales[delta]

                        if len(cls_boxes) != 0 and len(gt_boxes) != 0:
                            overlap_mat = bbox_overlaps(
                                cls_boxes.astype(np.float),
                                gt_boxes.astype(np.float))
                            keep = nms(
                                cls_dets[np.where(overlap_mat > 0.5)[0]])
                            nms_cls_dets = cls_dets[np.where(
                                overlap_mat > 0.5)[0]][keep]
                            nms_pos_count_per_batch += bbox_equal_count(
                                nms_cls_dets, target)
                        all_count_per_batch += len(target)
            else:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(ref_scores[:, j] > thresh)[0]
                    if cfg.TEST.FIRST_N > 0:
                        # todo: check whether the order affects the result
                        sort_indices = np.argsort(
                            ref_scores[:, j])[-cfg.TEST.FIRST_N:]
                        # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N]
                        indexes = np.intersect1d(sort_indices, indexes)

                    cls_scores = ref_scores[indexes, j, np.newaxis]
                    cls_boxes = ref_boxes[
                        indexes,
                        4:8] if cfg.CLASS_AGNOSTIC else ref_boxes[indexes, j *
                                                                  4:(j + 1) *
                                                                  4]
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx+delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(ref_scores)
                        # print np.min(cls_scores), valid_tally, valid_sum
                        # cls_scores = scores[:, j, np.newaxis]
                        # cls_scores[cls_scores <= thresh] = thresh
                        # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    if cfg.TEST.SOFTNMS:
                        ref_all_boxes[j][idx + delta] = nms(cls_dets)
                    else:
                        keep = nms(cls_dets)
                        ref_all_boxes[j][idx + delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    ref_all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            ref_all_boxes[j][idx +
                                             delta][:, -1] >= image_thresh)[0]
                        ref_all_boxes[j][idx + delta] = ref_all_boxes[j][
                            idx + delta][keep, :]

            if vis:
                ref_boxes_this_image = [[]] + [
                    ref_all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                if show_gt:
                    gt_boxes = label_dict['ref_gt_boxes'][0]
                    for gt_box in gt_boxes:
                        gt_box = gt_box.asnumpy()
                        gt_cls = int(gt_box[4])
                        gt_box = gt_box / scales[delta]
                        gt_box[4] = 1
                        if cfg.TEST.LEARN_NMS:
                            gt_box = np.append(gt_box, 1)
                        ref_boxes_this_image[gt_cls] = np.vstack(
                            (ref_boxes_this_image[gt_cls], gt_box))

                    if cfg.TEST.LEARN_NMS:
                        target_boxes = label_dict['ref_nms_multi_target']
                        for target_box in target_boxes:
                            print("ref", target_box * scales[delta])
                            target_cls = int(target_box[4]) + 1
                            target_box[4] = 2 + target_box[5]
                            target_box[5] = target_box[6]
                            target_box = target_box[:6]
                            ref_boxes_this_image[target_cls] = np.vstack(
                                (ref_boxes_this_image[target_cls], target_box))
                vis_double_all_detection(data_dict['data'].asnumpy(),
                                         boxes_this_image,
                                         data_dict['ref_data'].asnumpy(),
                                         ref_boxes_this_image, imdb.classes,
                                         scales[delta], cfg)
                # vis_all_detection(data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg)

        if DEBUG:
            nms_full_count.append(nms_full_count_per_batch)
            nms_pos_count.append(nms_pos_count_per_batch)
            is_max_count.append(is_max_count_per_batch)
            all_count.append(all_count_per_batch)
            print("full:{} pos:{} max:{}".format(
                1.0 * sum(nms_full_count) / sum(all_count),
                1.0 * sum(nms_pos_count) / sum(all_count),
                1.0 * sum(is_max_count) / sum(all_count)))
        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        post_processing_time.append(t3)
        all_inference_time.append(t1 + t2 + t3)
        inference_count += 1
        if inference_count % 200 == 0:
            valid_count = 500 if inference_count > 500 else inference_count
            print("--->> running-average inference time per batch: {}".format(
                float(sum(all_inference_time[-valid_count:])) / valid_count))
            print("--->> running-average post processing time per batch: {}".
                  format(
                      float(sum(post_processing_time[-valid_count:])) /
                      valid_count))
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, num_images, t1, t2, t3)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, num_images, t1, t2, t3))

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # np.save('class_lut.npy', class_lut)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
        # num_valid_classes = [len(x) for x in class_lut]
        logger.info('valid class ratio:{}'.format(
            np.sum(num_valid_classes) / float(num_images)))
        logger.info('valid score ratio:{}'.format(
            float(valid_tally) / float(valid_sum + 0.01)))
示例#10
0
def predict_on_image_names(
        image_names,
        config,
        model_path_id="/home/data/output/resnet_v1_101_coco_fcis_end2end_ohem-nebraska/train-nebraska/e2e",
        epoch=8):
    import argparse
    import os
    import sys
    import logging
    import pprint
    import cv2
    from utils.image import resize, transform
    import numpy as np
    # get config
    os.environ['PYTHONUNBUFFERED'] = '1'
    os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
    os.environ['MXNET_ENABLE_GPU_P2P'] = '0'
    cur_path = os.path.abspath(".")
    sys.path.insert(
        0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION))
    import mxnet as mx
    print("use mxnet at", mx.__file__)
    from core.tester import im_detect, Predictor
    from symbols import *
    from utils.load_model import load_param
    from utils.show_masks import show_masks
    from utils.tictoc import tic, toc
    from nms.nms import py_nms_wrapper
    from mask.mask_transform import gpu_mask_voting, cpu_mask_voting
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = ['cp']

    # load demo data
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    # loading the last epoch that was trained, 8
    arg_params, aux_params = load_param(model_path_id, epoch, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    all_classes = []
    all_configs = []
    all_masks = []
    all_dets = []
    all_ims = []

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, scales, config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print(im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes, scores[0], num_classes, 100, im_width, im_height,
                config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print('testing {} {:.4f}s'.format(im_name, toc()))
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]

        all_classes.append(classes)
        all_configs.append(config)
        all_masks.append(masks)
        all_dets.append(dets)
        im = cv2.imread(im_name)
        all_ims.append(im)
    return all_ims, all_dets, all_masks, all_configs, all_classes
示例#11
0
文件: eval.py 项目: hyb1234hi/argus
def net_inference(model, reqs):

    CTX.logger.info("inference begin...")
    # datas = json.loads(args)
    predictor = model['predictor']
    classes = model['classes']
    # threshold uses for default 
    threshold = 0.7
    thresholds = model['thresholds']
    rets = []
    nms = py_nms_wrapper(config.TEST.NMS)
    box_voting = py_box_voting_wrapper(config.TEST.BOX_VOTING_IOU_THRESH, config.TEST.BOX_VOTING_SCORE_THRESH,
                                      with_nms=True)

    try:
        for data in reqs:
            try:
                im = load_image(data['data']['uri'], body=data['data']['body'])
            except ErrorBase as e:
                rets.append({"code":e.code, "message": e.message, "result": None})
                continue
                # return [], 400, 'load image error'

            if im.shape[0] > im.shape[1]:
                long_side, short_side = im.shape[0], im.shape[1]
            else:
                long_side, short_side = im.shape[1], im.shape[0]

            if short_side > 0 and float(long_side)/float(short_side) > 50.0:
                msg = "aspect ration is too large, long_size:short_side should not larger than 50.0"
                # raise ErrorBase.__init__(400, msg)
                rets.append({"code": 400, "message": msg, "result": None})
                continue

            data_batch, data_names, im_scale = generate_batch(im)
            scores, boxes, data_dict = im_detect(predictor,
                                                data_batch,
                                                data_names,
                                                im_scale,
                                                config)
            det_ret = []
            for cls_index, cls in enumerate(classes[1:], start=1):
                if len(cls) > 1:
                    cls_ind = int(cls[0])
                    cls_name = cls[1]
                else:
                    cls_ind = cls_index
                    cls_name = cls[0]
                cls_boxes = boxes[0][:, 4:8] if config.CLASS_AGNOSTIC else boxes[0][:, 4 * cls_ind:4 *4 * (cls_ind + 1)]
                cls_scores = scores[0][:, cls_ind, np.newaxis]
                if len(classes) <= len(thresholds):
                    threshold = thresholds[cls_ind]
                else:
                    CTX.logger.info("Not set threshold for this %s"%(cls_index))
                                    
                keep = np.where(cls_scores >= threshold)[0]
                dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :]
                keep = nms(dets)
                det_ret.extend(_build_result(det, cls_name, cls_ind)
                    for det in dets[keep, :])

            rets.append(
                dict(
                    code=0,
                    message='',
                    result=dict(detections=det_ret)))

    except Exception as e:
        # print(traceback.format_exc())
        CTX.logger.info("inference error:%s"%(traceback.format_exc()))
        return [], 599, str(e)
    return rets, 0, ''
示例#12
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames/fgfa_rfcn_vid'
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg)

    # set up class names

    classes = ['__background__', 'p']
    num_classes = len(classes)

    image_names = glob.glob(args.input + '/*')
    image_names.sort()
    print("num of images", len(image_names))
    #line = '%s %s %s %s\n' % (dire.replace(join(join(data_dir, ANNOTATIONS, VID)) + "/", ""), '1', str(i), str(num_of_images))
    output_dir_tmp = '%s_%s_epoc_%d' % (
        args.dataset, os.path.basename(args.input), args.epoc)
    output_dir = os.path.join("/data2", "output", output_dir_tmp)
    print(output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({
            'data': im_tensor,
            'im_info': im_info,
            'data_cache': im_tensor,
            'feat_cache': im_tensor
        })

    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
        ('feat_cache',
         ((19, cfg.network.FGFA_FEAT_DIM,
           np.ceil(max([v[0]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int),
           np.ceil(max([v[1]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int))))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(model, args.epoc, process=True)
    feat_predictors = Predictor(feat_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)

    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]],
                                 label=[],
                                 pad=0,
                                 index=idx,
                                 provide_data=[[
                                     (k, v.shape)
                                     for k, v in zip(data_names, data[idx])
                                 ]],
                                 provide_label=[None])
    scales = [
        data_batch.data[i][1].asnumpy()[0, 2]
        for i in xrange(len(data_batch.data))
    ]
    all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = args.threshold
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, element)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        if (idx != len(data) - 1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                data_batch.data[0][-2] = None
                data_batch.provide_data[0][-2] = ('data_cache', None)
                data_batch.data[0][-1] = None
                data_batch.provide_data[0][-1] = ('feat_cache', None)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch,
                                        data_names, scales, cfg)

                out_im = process_pred_result(
                    classes, pred_result, num_classes, thresh, cfg, nms,
                    all_boxes, file_idx, max_per_image, vis,
                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)

                total_time = time.time() - t1
                if (cfg.TEST.SEQ_NMS == False):
                    save_image(output_dir, file_idx, out_im)
                print 'testing {} {:.4f}s'.format(
                    str(file_idx) + '.JPEG', total_time / (file_idx + 1))
                file_idx += 1
                end_counter += 1

    if (cfg.TEST.SEQ_NMS):
        video = [all_boxes[j][:] for j in range(1, num_classes)]
        dets_all = seq_nms(video)
        for cls_ind, dets_cls in enumerate(dets_all):
            for frame_ind, dets in enumerate(dets_cls):
                keep = nms(dets)
                all_boxes[cls_ind + 1][frame_ind] = dets[keep, :]
        for idx in range(len(data)):
            boxes_this_image = [[]] + [
                all_boxes[j][idx] for j in range(1, num_classes)
            ]
            out_im = draw_all_detection(data[idx][0].asnumpy(),
                                        boxes_this_image, classes, scales[0],
                                        cfg)
            save_image(output_dir, idx, out_im)

    print 'done'
def pred_eval(predictor,
              test_data,
              imdb,
              cfg,
              vis=False,
              thresh=1e-3,
              logger=None,
              ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    #if cfg.TEST.SOFTNMS:
    #    nms = py_softnms_wrapper(cfg.TEST.NMS)
    #else:
    #    nms = py_nms_wrapper(cfg.TEST.NMS)

    if cfg.TEST.SOFTNMS:
        nms = py_softnms_wrapper(cfg.TEST.NMS)
    else:
        nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    class_lut = [[] for _ in range(imdb.num_classes)]
    valid_tally = 0
    valid_sum = 0

    idx = 0
    t = time.time()
    inference_count = 0
    all_inference_time = []
    post_processing_time = []
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        for delta, (scores, boxes, data_dict) in enumerate(
                zip(scores_all, boxes_all, data_dict_all)):
            if cfg.TEST.LEARN_NMS:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j - 1] > thresh)[0]
                    cls_scores = scores[indexes, j - 1:j]
                    cls_boxes = boxes[indexes, j - 1, :]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)
                    all_boxes[j][idx + delta] = cls_dets
            else:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j] > thresh)[0]
                    if cfg.TEST.FIRST_N > 0:
                        # todo: check whether the order affects the result
                        sort_indices = np.argsort(
                            scores[:, j])[-cfg.TEST.FIRST_N:]
                        # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N]
                        indexes = np.intersect1d(sort_indices, indexes)

                    cls_scores = scores[indexes, j, np.newaxis]
                    cls_boxes = boxes[indexes,
                                      4:8] if cfg.CLASS_AGNOSTIC else boxes[
                                          indexes, j * 4:(j + 1) * 4]
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)
                        # print np.min(cls_scores), valid_tally, valid_sum
                        # cls_scores = scores[:, j, np.newaxis]
                        # cls_scores[cls_scores <= thresh] = thresh
                        # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    if cfg.TEST.SOFTNMS:
                        all_boxes[j][idx + delta] = nms(cls_dets)
                    else:
                        keep = nms(cls_dets)
                        all_boxes[j][idx + delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                vis_all_detection(data_dict['data'].asnumpy(),
                                  boxes_this_image, imdb.classes,
                                  scales[delta], cfg)

        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        post_processing_time.append(t3)
        all_inference_time.append(t1 + t2 + t3)
        inference_count += 1
        if inference_count % 200 == 0:
            valid_count = 500 if inference_count > 500 else inference_count
            print("--->> running-average inference time per batch: {}".format(
                float(sum(all_inference_time[-valid_count:])) / valid_count))
            print("--->> running-average post processing time per batch: {}".
                  format(
                      float(sum(post_processing_time[-valid_count:])) /
                      valid_count))
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, imdb.num_images, t1, t2, t3)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, imdb.num_images, t1, t2, t3))

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # np.save('class_lut.npy', class_lut)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
        num_valid_classes = [len(x) for x in class_lut]
        logger.info('valid class ratio:{}'.format(
            np.sum(num_valid_classes) / float(num_images)))
        logger.info('valid score ratio:{}'.format(
            float(valid_tally) / float(valid_sum + 0.01)))
示例#14
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = py_nms_wrapper(self._threshold)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
            scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
示例#15
0
def det(mod, fn):
    raw_img = cv2.imdecode(np.fromfile(fn, dtype=np.uint8),-1)
    h,w = raw_img.shape[0], raw_img.shape[1]

    if h > DST_SIZE or w > DST_SIZE:
        if h > w:
            raw_img = cv2.copyMakeBorder(raw_img, 0, 0, 0, h-w, cv2.BORDER_CONSTANT, value=(128,128,128))
            s = DST_SIZE / float(h)
        else:
            raw_img = cv2.copyMakeBorder(raw_img, 0, w-h, 0, 0, cv2.BORDER_CONSTANT, value=(128,128,128))
            s = DST_SIZE / float(w)
        raw_img = cv2.resize(raw_img, (DST_SIZE, DST_SIZE))

    else:
        if h <= DST_SIZE:
            bottom = DST_SIZE - h
        if w <= DST_SIZE:
            right = DST_SIZE - w
        raw_img = cv2.copyMakeBorder(raw_img, 0, bottom, 0, right, cv2.BORDER_CONSTANT, value=(128,128,128))


    im_shape = [IMG_H,IMG_W] # reverse order
    img = cv2.resize(raw_img, (IMG_H,IMG_W))
    raw_h = img.shape[0]
    raw_w = img.shape[1]

    im_tensor = image.transform(img, [103.06,115.90,123.15])

    im_info = np.array([[  IMG_H,   IMG_W,   4.18300658e-01]])

    batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)])

    start = time.time()
    mod.forward(batch)

    output_names = mod.output_names
    output_tensor = mod.get_outputs()
    print ("time", time.time()-start, "secs.")

    output = dict(zip(output_names ,output_tensor))

    rois = output['rois_output'].asnumpy()[:, 1:]
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    num_classes = 2

    all_cls_dets = [[] for _ in range(num_classes)]

    for j in range(1, num_classes):
        indexes = np.where(scores[:, j] > 0.1)[0]
        cls_scores = scores[indexes, j, np.newaxis]
        cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4]
        cls_dets = np.hstack((cls_boxes, cls_scores)).copy()
        all_cls_dets[j] = cls_dets

    for idx_class in range(1, num_classes):
        nms = py_nms_wrapper(0.3)
        keep = nms(all_cls_dets[idx_class])
        all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :]

    for i in range(all_cls_dets[1].shape[0]):
        cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1]))
        ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),4)

    if img.shape[0] > 1024 or img.shape[1]> 1024:
        img = cv2.resize(img, (0,0), fx=0.3, fy=0.3)
    cv2.imshow("w", img)
    cv2.waitKey()
示例#16
0
def pred_eval(gpu_id,
              feat_predictors,
              aggr_predictors,
              test_data,
              imdb,
              cfg,
              orig_pred,
              vis=False,
              thresh=1e-3,
              logger=None,
              ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_' + str(gpu_id))
    if cfg.TEST.SEQ_NMS == True:
        det_file += '_raw'

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]
    num_images = test_data.size
    # init frame id for each video (all videos level unique id)
    roidb_frame_ids = []
    for x in test_data.roidb:
        # for epic kitchen, the frame id is not continuous
        if 'unique_ids' in x:
            roidb_frame_ids.append(x['unique_ids'])
        else:
            roidb_frame_ids.append(x['frame_id'])

    # roidb_frame_ids = [x['frame_id'] for x in test_data.roidb]
    roidb_frame_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \
                            for x in test_data.roidb]
    gt_classes = [x['gt_classes'] for x in test_data.roidb]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)
    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    frame_ids = np.zeros(num_images, dtype=np.int)

    roidb_idx = -1
    roidb_offset = -1
    idx = 0
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride * 2 + 1
    strided_idx = [
        _i for _i in range(all_frame_interval)
        if _i % cfg.TEST.sample_stride == 0
    ]
    print(strided_idx, len(strided_idx))
    print('all_frame_interval', all_frame_interval, 'sample stride',
          cfg.TEST.sample_stride)

    data_time, net_time, post_time, seq_time = 0.0, 0.0, 0.0, 0.0
    t = time.time()

    # loop through all the test data
    for frame_offset, img_path, im_info, key_frame_flag, data_batch in test_data:

        ########
        ### for drawing
        img_path = img_path.split('/')[-3:]
        if not os.path.exists(os.path.join(imdb.result_path, *img_path[:2])):
            os.makedirs(os.path.join(imdb.result_path, *img_path[:2]))
        ########

        t1 = time.time() - t
        t = time.time()

        #################################################
        # new video                                     #
        #################################################
        # empty lists and append padding images
        # do not do prediction yet
        if key_frame_flag == 0:
            roidb_idx += 1
            roidb_offset = -1
            roidb_frame_seg_len = roidb_frame_seg_lens[roidb_idx]
            # init data_lsit and feat_list for a new video
            data_list = deque(maxlen=all_frame_interval)
            feat_list = deque(maxlen=all_frame_interval)
            img_paths_list = deque(maxlen=all_frame_interval)
            frame_offset_list = deque(maxlen=all_frame_interval)
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            # append cfg.TEST.KEY_FRAME_INTERVAL+1 padding images in the front (first frame)
            while len(
                    data_list
            ) < cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride + 1:
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)

            if cfg.TEST.KEY_FRAME_INTERVAL == 0:
                scales = [iim_info[0, 2] for iim_info in im_info]
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)

                strided_data_list = [data_list[_idx] for _idx in strided_idx]
                strided_feat_list = [feat_list[_idx] for _idx in strided_idx]

                prepare_data(strided_data_list, strided_feat_list, data_batch)
                pred_result, aggr_feat, nonlocal_weights = im_detect(
                    aggr_predictors, data_batch, data_names, scales, cfg)

                roidb_offset += 1

                if cfg.TEST.video_shuffle:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                              cfg.TEST.sample_stride]]
                    else:
                        frame_ids[idx] = roidb_frame_ids[roidb_idx] + \
                                         frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]
                else:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            roidb_offset]
                    else:
                        frame_ids[
                            idx] = roidb_frame_ids[roidb_idx] + roidb_offset

                t2 = time.time() - t
                t = time.time()
                # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis,
                #                              data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales)
                if orig_pred:
                    out_im = process_pred_result(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)
                else:
                    out_im = process_pred_result_v2(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)

                if vis:
                    output_dir = os.path.join(
                        imdb.result_path,
                        *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                        cfg.TEST.sample_stride])
                    cv2.imwrite(os.path.join(root_path, output_dir), out_im)
                idx += test_data.batch_size

                t3 = time.time() - t
                t = time.time()
                data_time += t1
                net_time += t2
                post_time += t3
                print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                      format(idx, num_images,
                             data_time / idx * test_data.batch_size,
                             net_time / idx * test_data.batch_size,
                             post_time / idx * test_data.batch_size))
                if logger:
                    logger.info(
                        'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                        format(idx, num_images,
                               data_time / idx * test_data.batch_size,
                               net_time / idx * test_data.batch_size,
                               post_time / idx * test_data.batch_size))
        #################################################
        # main part of the loop                         #
        #################################################
        elif key_frame_flag == 2:
            # keep appending data to the lists without doing prediction until the lists contain 2 * cfg.TEST.KEY_FRAME_INTERVAL objects
            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)

            else:
                scales = [iim_info[0, 2] for iim_info in im_info]

                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)

                strided_data_list = [data_list[_idx] for _idx in strided_idx]
                strided_feat_list = [feat_list[_idx] for _idx in strided_idx]

                prepare_data(strided_data_list, strided_feat_list, data_batch)
                pred_result, aggr_feat, nonlocal_weights = im_detect(
                    aggr_predictors, data_batch, data_names, scales, cfg)

                roidb_offset += 1
                if cfg.TEST.video_shuffle:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                              cfg.TEST.sample_stride]]
                    else:
                        frame_ids[idx] = roidb_frame_ids[roidb_idx] + \
                                         frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]
                else:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            roidb_offset]
                    else:
                        frame_ids[
                            idx] = roidb_frame_ids[roidb_idx] + roidb_offset

                t2 = time.time() - t
                t = time.time()
                # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis,
                #                              data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales)
                if orig_pred:
                    out_im = process_pred_result(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)
                else:
                    out_im = process_pred_result_v2(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)

                if vis:
                    output_dir = os.path.join(
                        imdb.result_path,
                        *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                        cfg.TEST.sample_stride])
                    cv2.imwrite(os.path.join(root_path, output_dir), out_im)
                idx += test_data.batch_size

                t3 = time.time() - t
                t = time.time()
                data_time += t1
                net_time += t2
                post_time += t3
                print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                      format(idx, num_images,
                             data_time / idx * test_data.batch_size,
                             net_time / idx * test_data.batch_size,
                             post_time / idx * test_data.batch_size))
                if logger:
                    logger.info(
                        'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                        format(idx, num_images,
                               data_time / idx * test_data.batch_size,
                               net_time / idx * test_data.batch_size,
                               post_time / idx * test_data.batch_size))
        #################################################
        # end part of a video                           #
        #################################################
        elif key_frame_flag == 1:  # last frame of a video
            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)

            ##############
            while len(data_list) < all_frame_interval - 1:
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)
            ##############

            scales = [iim_info[0, 2] for iim_info in im_info]
            while end_counter < min(
                    roidb_frame_seg_len,
                    cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride + 1):
                data_list.append(image)
                feat_list.append(feat)
                img_paths_list.append(img_path)
                frame_offset_list.append(frame_offset)

                strided_data_list = [data_list[_idx] for _idx in strided_idx]
                strided_feat_list = [feat_list[_idx] for _idx in strided_idx]
                prepare_data(strided_data_list, strided_feat_list, data_batch)
                pred_result, aggr_feat, _ = im_detect(aggr_predictors,
                                                      data_batch, data_names,
                                                      scales, cfg)

                roidb_offset += 1
                if cfg.TEST.video_shuffle:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                              cfg.TEST.sample_stride]]
                    else:
                        frame_ids[idx] = roidb_frame_ids[roidb_idx] + \
                                         frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]
                else:
                    if not isinstance(roidb_frame_ids[roidb_idx], int):
                        frame_ids[idx] = roidb_frame_ids[roidb_idx][
                            roidb_offset]
                    else:
                        frame_ids[
                            idx] = roidb_frame_ids[roidb_idx] + roidb_offset

                t2 = time.time() - t
                t = time.time()
                # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image,
                #                              vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales)
                if orig_pred:
                    out_im = process_pred_result(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)
                else:
                    out_im = process_pred_result_v2(
                        pred_result, imdb, thresh, cfg, nms, all_boxes, idx,
                        max_per_image, vis,
                        data_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                  cfg.TEST.sample_stride].asnumpy(), scales)

                if vis:
                    output_dir = os.path.join(
                        imdb.result_path,
                        *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL *
                                        cfg.TEST.sample_stride])
                    cv2.imwrite(os.path.join(root_path, output_dir), out_im)
                idx += test_data.batch_size
                t3 = time.time() - t
                t = time.time()
                data_time += t1
                net_time += t2
                post_time += t3

                print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                      format(idx, num_images,
                             data_time / idx * test_data.batch_size,
                             net_time / idx * test_data.batch_size,
                             post_time / idx * test_data.batch_size))
                if logger:
                    logger.info(
                        'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                        format(idx, num_images,
                               data_time / idx * test_data.batch_size,
                               net_time / idx * test_data.batch_size,
                               post_time / idx * test_data.batch_size))
                end_counter += 1

    with open(det_file, 'wb') as f:
        cPickle.dump((all_boxes, frame_ids),
                     f,
                     protocol=cPickle.HIGHEST_PROTOCOL)

    return all_boxes, frame_ids
示例#17
0
def pred_eval(gpu_id,
              key_predictor,
              cur_predictor,
              test_data,
              imdb,
              cfg,
              vis=False,
              thresh=1e-4,
              logger=None,
              ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path,
                            imdb.name + '_' + str(gpu_id) + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes, frame_ids = cPickle.load(fid)
        return all_boxes, frame_ids

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]
    num_images = test_data.size
    roidb_frame_ids = [x['frame_id'] for x in test_data.roidb]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    frame_ids = np.zeros(num_images, dtype=np.int)

    roidb_idx = -1
    roidb_offset = -1
    idx = 0
    data_time, net_time, post_time = 0.0, 0.0, 0.0
    t = time.time()
    for im_info, key_frame_flag, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        if key_frame_flag != 2:
            scores_all, boxes_all, data_dict_all, feat = im_detect(
                key_predictor, data_batch, data_names, scales, cfg)
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores_all, boxes_all, data_dict_all, _ = im_detect(
                cur_predictor, data_batch, data_names, scales, cfg)

        if key_frame_flag == 0:
            roidb_idx += 1
            roidb_offset = 0
        else:
            roidb_offset += 1

        frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset

        t2 = time.time() - t
        t = time.time()
        for delta, (scores, boxes, data_dict) in enumerate(
                zip(scores_all, boxes_all, data_dict_all)):
            for j in range(1, imdb.num_classes):
                indexes = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[indexes, j, np.newaxis]
                cls_boxes = boxes[indexes,
                                  4:8] if cfg.CLASS_AGNOSTIC else boxes[
                                      indexes, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j][idx + delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                vis_all_detection(data_dict['data'].asnumpy(),
                                  boxes_this_image, imdb.classes,
                                  scales[delta], cfg)

        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        data_time += t1
        net_time += t2
        post_time += t3
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, num_images, data_time / idx * test_data.batch_size,
            net_time / idx * test_data.batch_size,
            post_time / idx * test_data.batch_size)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, num_images, data_time / idx * test_data.batch_size,
                    net_time / idx * test_data.batch_size,
                    post_time / idx * test_data.batch_size))

    with open(det_file, 'wb') as f:
        cPickle.dump((all_boxes, frame_ids),
                     f,
                     protocol=cPickle.HIGHEST_PROTOCOL)

    return all_boxes, frame_ids
示例#18
0
def pred_eval(predictor,
              test_data,
              imdb,
              cfg,
              vis=False,
              thresh=1e-3,
              logger=None,
              ignore_cache=False):

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    seg_file = os.path.join(imdb.result_path, imdb.name + '_masks.pkl')

    if os.path.exists(det_file) and os.path.exists(
            seg_file) and not ignore_cache:
        with open(det_file, 'rb') as f:
            all_boxes = cPickle.load(f)
        with open(seg_file, 'rb') as f:
            all_masks = cPickle.load(f)
    else:
        assert vis or not test_data.shuffle
        data_names = [k[0] for k in test_data.provide_data[0]]

        if not isinstance(test_data, PrefetchingIter):
            test_data = PrefetchingIter(test_data)

        # function pointers
        nms = py_nms_wrapper(cfg.TEST.NMS)
        mask_voting = gpu_mask_voting if cfg.TEST.USE_GPU_MASK_MERGE else cpu_mask_voting

        max_per_image = 100 if cfg.TEST.USE_MASK_MERGE else -1
        num_images = imdb.num_images
        all_boxes = [[[] for _ in xrange(num_images)]
                     for _ in xrange(imdb.num_classes)]
        all_masks = [[[] for _ in xrange(num_images)]
                     for _ in xrange(imdb.num_classes)]

        idx = 0
        t = time.time()
        for data_batch in test_data:
            t1 = time.time() - t
            t = time.time()

            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]
            scores_all, boxes_all, masks_all, data_dict_all = im_detect(
                predictor, data_batch, data_names, scales, cfg)
            im_shapes = [
                data_batch.data[i][0].shape[2:4]
                for i in xrange(len(data_batch.data))
            ]

            t2 = time.time() - t
            t = time.time()

            # post processing
            for delta, (scores, boxes, masks, data_dict) in enumerate(
                    zip(scores_all, boxes_all, masks_all, data_dict_all)):

                if not cfg.TEST.USE_MASK_MERGE:
                    for j in range(1, imdb.num_classes):
                        indexes = np.where(scores[:, j] > thresh)[0]
                        cls_scores = scores[indexes, j, np.newaxis]
                        cls_masks = masks[indexes, 1, :, :]
                        try:
                            if cfg.CLASS_AGNOSTIC:
                                cls_boxes = boxes[indexes, :]
                            else:
                                raise Exception()
                        except:
                            cls_boxes = boxes[indexes, j * 4:(j + 1) * 4]

                        cls_dets = np.hstack((cls_boxes, cls_scores))
                        keep = nms(cls_dets)
                        all_boxes[j][idx + delta] = cls_dets[keep, :]
                        all_masks[j][idx + delta] = cls_masks[keep, :]
                else:
                    masks = masks[:, 1:, :, :]
                    im_height = np.round(im_shapes[delta][0] /
                                         scales[delta]).astype('int')
                    im_width = np.round(im_shapes[delta][1] /
                                        scales[delta]).astype('int')
                    boxes = clip_boxes(boxes, (im_height, im_width))
                    result_mask, result_box = mask_voting(
                        masks, boxes, scores, imdb.num_classes, max_per_image,
                        im_width, im_height, cfg.TEST.NMS,
                        cfg.TEST.MASK_MERGE_THRESH, cfg.BINARY_THRESH)
                    for j in xrange(1, imdb.num_classes):
                        all_boxes[j][idx + delta] = result_box[j]
                        all_masks[j][idx + delta] = result_mask[j][:, 0, :, :]

                if vis:
                    boxes_this_image = [[]] + [
                        all_boxes[j][idx + delta]
                        for j in range(1, imdb.num_classes)
                    ]
                    masks_this_image = [[]] + [
                        all_masks[j][idx + delta]
                        for j in range(1, imdb.num_classes)
                    ]
                    vis_all_mask(data_dict['data'].asnumpy(), boxes_this_image,
                                 masks_this_image, imdb.classes, scales[delta],
                                 cfg)

            idx += test_data.batch_size
            t3 = time.time() - t
            t = time.time()

            print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                idx, imdb.num_images, t1, t2, t3)
            if logger:
                logger.info(
                    'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.
                    format(idx, imdb.num_images, t1, t2, t3))

        with open(det_file, 'wb') as f:
            cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)
        with open(seg_file, 'wb') as f:
            cPickle.dump(all_masks, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_sds(all_boxes, all_masks)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
示例#19
0
def pred_eval(gpu_id, feat_predictors, aggr_predictors, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_'+ str(gpu_id))
    if cfg.TEST.SEQ_NMS == True:
        det_file += '_raw'
    print('det_file=', det_file)
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes, frame_ids = pickle.load(fid)
        return all_boxes, frame_ids


    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]
    num_images = test_data.size
    roidb_frame_ids = [x['frame_id'] for x in test_data.roidb]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)
    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    frame_ids = np.zeros(num_images, dtype=np.int)

    roidb_idx = -1
    roidb_offset = -1
    idx = 0
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1

    data_time, net_time, post_time,seq_time = 0.0, 0.0, 0.0,0.0
    t = time.time()

    # loop through all the test data
    for im_info, key_frame_flag, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        #################################################
        # new video                                     #
        #################################################
        # empty lists and append padding images
        # do not do prediction yet
        if key_frame_flag == 0:
            roidb_idx += 1
            roidb_offset = -1
            # init data_lsit and feat_list for a new video
            data_list = deque(maxlen=all_frame_interval)
            feat_list = deque(maxlen=all_frame_interval)
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            # append cfg.TEST.KEY_FRAME_INTERVAL+1 padding images in the front (first frame)
            while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL+1:
                data_list.append(image)
                feat_list.append(feat)

        #################################################
        # main part of the loop                         #
        #################################################
        elif key_frame_flag == 2:
            # keep appending data to the lists without doing prediction until the lists contain 2 * cfg.TEST.KEY_FRAME_INTERVAL objects
            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                scales = [iim_info[0, 2] for iim_info in im_info]

                image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                roidb_offset += 1
                frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset

                t2 = time.time() - t
                t = time.time()
                process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis,
                                    data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                idx += test_data.batch_size

                t3 = time.time() - t
                t = time.time()
                data_time += t1
                net_time += t2
                post_time += t3
                print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images,
                                                                                   data_time / idx * test_data.batch_size,
                                                                                   net_time / idx * test_data.batch_size,
                                                                                   post_time / idx * test_data.batch_size))
                if logger:
                    logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images,
                                                                                             data_time / idx * test_data.batch_size,
                                                                                             net_time / idx * test_data.batch_size,
                                                                                             post_time / idx * test_data.batch_size))
        #################################################
        # end part of a video                           #
        #################################################
        elif key_frame_flag == 1:       # last frame of a video
            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg)

                roidb_offset += 1
                frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset

                t2 = time.time() - t
                t = time.time()
                process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales)
                idx += test_data.batch_size
                t3 = time.time() - t
                t = time.time()
                data_time += t1
                net_time += t2
                post_time += t3

                print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images,
                                                                                   data_time / idx * test_data.batch_size,
                                                                                   net_time / idx * test_data.batch_size,
                                                                                   post_time / idx * test_data.batch_size))
                if logger:
                    logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images,
                                                                                             data_time / idx * test_data.batch_size,
                                                                                             net_time / idx * test_data.batch_size,
                                                                                             post_time / idx * test_data.batch_size))
                end_counter += 1

    with open(det_file, 'wb') as f:
        pickle.dump((all_boxes, frame_ids), f, protocol=pickle.HIGHEST_PROTOCOL)

    return all_boxes, frame_ids
示例#20
0
def cpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    """
    masks = masks.astype(np.float32)
    mask_size = masks.shape[-1]
    nms = py_nms_wrapper(nms_thresh)
    # apply nms and sort to get first images according to their scores

    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i, 0].astype(np.float32),
                          (box[2] - box[0] + 1, box[3] - box[1] + 1))
        res_mask[i] = mask

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size))
        boxes_ar = np.zeros((num_boxes, 4))
        for i in xrange(num_boxes):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            p_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask,
                                                      cur_weights, im_width,
                                                      im_height, binary_thresh)
            masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32),
                                        (mask_size, mask_size))
        boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis]))
        list_result_box[c] = boxes_scored_ar
        list_result_mask[c] = masks_ar
    return list_result_mask, list_result_box
示例#21
0
def pred_eval(predictor,
              test_data,
              imdb,
              cfg,
              vis=False,
              thresh=1e-3,
              logger=None,
              ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = 300

    num_images = imdb.num_images
    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    idx = 0
    data_time, net_time, post_time = 0.0, 0.0, 0.0
    t = time.time()
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]

        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        for delta, (scores, boxes, data_dict) in enumerate(
                zip(scores_all, boxes_all, data_dict_all)):
            for j in range(1, imdb.num_classes):
                indexes = np.where(scores[:, j] > thresh)[0]
                cls_scores = scores[indexes, j, np.newaxis]
                cls_boxes = boxes[indexes,
                                  4:8] if cfg.CLASS_AGNOSTIC else boxes[
                                      indexes, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j][idx + delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                max_per_image = 0
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    image_thresh = 0.5
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]
                    print len(all_boxes)

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                vis_all_detection(data_dict['data'].asnumpy(),
                                  boxes_this_image, imdb.classes,
                                  scales[delta], cfg)

        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        data_time += t1
        net_time += t2
        post_time += t3
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, imdb.num_images, data_time / idx * test_data.batch_size,
            net_time / idx * test_data.batch_size,
            post_time / idx * test_data.batch_size)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, imdb.num_images,
                    data_time / idx * test_data.batch_size,
                    net_time / idx * test_data.batch_size,
                    post_time / idx * test_data.batch_size))

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
示例#22
0
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 2
    classes = [
        '__background__',  # always index 0
        '1'
    ]

    # load demo data
    image_names = []
    names_dirs = os.listdir(cur_path + '/../' + test_dir)
    for im_name in names_dirs:
        if im_name[-4:] == '.jpg' or im_name[-4:] == '.png':
            image_names.append(im_name)

    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../' + test_dir + im_name), (
            '%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../' + test_dir + im_name,
                        cv2.IMREAD_COLOR | long(128))
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        #print "before scale: "
        #print im.shape
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print "after scale: "
        #print im.shape
        #im_scale = 1.0
        #print "scale ratio: "
        #print im_scale
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        #print im_tensor.shape
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../' + model_dir,
                                        0,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(ctx_id[0])],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales,
                               config)
    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch,
                                                    data_names, [1.0], config)
        im_shapes = [
            data_batch.data[i][0].shape[2:4]
            for i in xrange(len(data_batch.data))
        ]
        #print im_shapes

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            result_masks, result_dets = gpu_mask_voting(
                masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1],
                im_shapes[0][0], config.TEST.NMS,
                config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [
                result_masks[j][:, 0, :, :] for j in range(1, num_classes)
            ]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:, -1] > 0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../' + test_dir + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False)

        # Save img
        cv2.imwrite(cur_path + '/../' + result_dir + im_name,
                    cv2.cvtColor(im, cv2.COLOR_BGR2RGB))

    print 'done'
示例#23
0
    def aggregateSingle(self,
                        scale_cls_dets,
                        scale_cls_masks,
                        vis=False,
                        cache_name='cache',
                        vis_path=None,
                        vis_name=None,
                        vis_ext='.png'):

        n_scales = len(scale_cls_dets)
        assert n_scales == len(
            self.cfg.TEST.VALID_RANGES
        ), 'A valid range should be specified for each test scale'

        all_boxes = [[[] for _ in range(self.num_images)]
                     for _ in range(self.num_classes)]
        all_masks = [[[] for _ in range(self.num_images)]
                     for _ in range(self.num_classes)]
        if len(scale_cls_dets) > 1:
            self.show_info(
                'Aggregating detections from multiple scales and applying NMS...'
            )
        else:
            self.show_info('Performing NMS on detections...')

        # TODO: change the hard code here, change it to soft_nms or mask_nms
        nms = py_nms_wrapper(0.3)
        # nms = gpu_nms_wrapper(0.3, 0)
        # Apply ranges and store detections per category
        for i in range(self.num_images):
            for j in range(1, self.num_classes):
                agg_dets = np.empty((0, 5), dtype=np.float32)
                agg_masks = np.empty((0, 28, 28), dtype=np.float32)
                for all_cls_dets, all_cls_masks, valid_range in zip(
                        scale_cls_dets, scale_cls_masks,
                        self.cfg.TEST.VALID_RANGES):
                    cls_dets = all_cls_dets[j][i]
                    cls_masks = all_cls_masks[j][i]
                    heights = cls_dets[:, 2] - cls_dets[:, 0]
                    widths = cls_dets[:, 3] - cls_dets[:, 1]
                    areas = widths * heights
                    lvalid_ids = np.where(areas > valid_range[0]*valid_range[0])[0] if valid_range[0] > 0 else \
                        np.arange(len(areas))
                    uvalid_ids = np.where(areas <= valid_range[1]*valid_range[1])[0] if valid_range[1] > 0 else \
                        np.arange(len(areas))
                    valid_ids = np.intersect1d(lvalid_ids, uvalid_ids)
                    cls_dets = cls_dets[
                        valid_ids, :] if len(valid_ids) > 0 else cls_dets
                    cls_masks = cls_masks[
                        valid_ids, :, :] if len(valid_ids) > 0 else cls_masks
                    # pdb.set_trace()
                    agg_dets = np.vstack(
                        (agg_dets, cls_dets.astype(np.float32)))
                    # pdb.set_trace()
                    agg_masks = np.concatenate((agg_masks, cls_masks), axis=0)
                # start = timeit.default_timer()
                keep = nms(agg_dets)
                # stop = timeit.default_timer()
                # print 'nms time: ', stop - start
                all_boxes[j][i] = agg_dets[keep, :]
                all_masks[j][i] = agg_masks[keep, :]
                # parallel_nms_args[int(i/n_roi_per_pool)].append(agg_dets)

        # Divide roidb and perform NMS in parallel to reduce the memory usage
        # TODO: change to multi process later

        # Limit number of detections to MAX_PER_IMAGE if requested and visualize if vis is True
        for i in range(self.num_images):
            if self.cfg.TEST.MAX_PER_IMAGE > 0:
                image_scores = np.hstack([
                    all_boxes[j][i][:, -1] for j in range(1, self.num_classes)
                ])
                if len(image_scores) > self.cfg.TEST.MAX_PER_IMAGE:
                    image_thresh = np.sort(
                        image_scores)[-self.cfg.TEST.MAX_PER_IMAGE]
                    for j in range(1, self.num_classes):
                        keep = np.where(
                            all_boxes[j][i][:, -1] >= image_thresh)[0]
                        all_boxes[j][i] = all_boxes[j][i][keep, :]
                        all_masks[j][i] = all_masks[j][i][keep, :]

            if vis:

                visualization_path = vis_path if vis_path else os.path.join(
                    self.cfg.TEST.VISUALIZATION_PATH, cache_name)
                if not os.path.isdir(visualization_path):
                    os.makedirs(visualization_path)
                import cv2
                im = cv2.cvtColor(cv2.imread(self.roidb[i]['image']),
                                  cv2.COLOR_BGR2RGB)

                visualize_masks(
                    im, [[]] +
                    [all_boxes[j][i]
                     for j in range(1, self.num_classes)], [[]] +
                    [all_masks[j][i] for j in range(1, self.num_classes)],
                    1.0,
                    self.cfg.network.PIXEL_MEANS,
                    self.class_names,
                    threshold=0.5,
                    save_path=os.path.join(
                        visualization_path,
                        '{}{}'.format(vis_name if vis_name else i, vis_ext)),
                    transform=False)
        if cache_name:
            cache_path = os.path.join(self.result_path, cache_name)
            if not os.path.isdir(cache_path):
                os.makedirs(cache_path)
            cache_path = os.path.join(cache_path, 'detections.pkl')
            self.show_info(
                'Done! Saving detections into: {}'.format(cache_path))
            with open(cache_path, 'wb') as detfile:
                cPickle.dump(all_boxes, detfile)
        return all_boxes, all_masks
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image
    num_images = imdb.num_images

    for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES):
        det_file_single_scale = os.path.join(imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl')
        # if os.path.exists(det_file_single_scale):
        #    continue
        cfg.SCALES = [test_scale]
        test_data.reset()

        # all detections are collected into:
        #    all_boxes[cls][image] = N x 5 array of detections in
        #    (x1, y1, x2, y2, score)
        all_boxes_single_scale = [[[] for _ in range(num_images)]
                                  for _ in range(imdb.num_classes)]

        detect_at_single_scale(predictor, data_names, imdb, test_data, cfg, thresh, vis, all_boxes_single_scale, logger)

        with open(det_file_single_scale, 'wb') as f:
            cPickle.dump(all_boxes_single_scale, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)]

    for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES):
        det_file_single_scale = os.path.join(imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl')
        if os.path.exists(det_file_single_scale):
            with open(det_file_single_scale, 'rb') as fid:
                all_boxes_single_scale = cPickle.load(fid)
            for idx_class in range(1, imdb.num_classes):
                for idx_im in range(0, num_images):
                    if len(all_boxes[idx_class][idx_im]) == 0:
                        all_boxes[idx_class][idx_im] = all_boxes_single_scale[idx_class][idx_im]
                    else:
                        all_boxes[idx_class][idx_im] = np.vstack((all_boxes[idx_class][idx_im], all_boxes_single_scale[idx_class][idx_im]))

    for idx_class in range(1, imdb.num_classes):
        for idx_im in range(0, num_images):
            if cfg.TEST.USE_SOFTNMS:
                soft_nms = py_softnms_wrapper(cfg.TEST.SOFTNMS_THRESH, max_dets=max_per_image)
                all_boxes[idx_class][idx_im] = soft_nms(all_boxes[idx_class][idx_im])
            else:
                nms = py_nms_wrapper(cfg.TEST.NMS)
                keep = nms(all_boxes[idx_class][idx_im])
                all_boxes[idx_class][idx_im] = all_boxes[idx_class][idx_im][keep, :]

    if max_per_image > 0:
        for idx_im in range(0, num_images):
            image_scores = np.hstack([all_boxes[j][idx_im][:, -1]
                                      for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0]
                    all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :]

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
示例#25
0
def pred_eval(predictor,
              test_data,
              imdb,
              cfg,
              vis=False,
              thresh=1e-3,
              logger=None,
              ignore_cache=True):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """

    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image
    num_images = imdb.num_images

    for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES):
        det_file_single_scale = os.path.join(
            imdb.result_path,
            imdb.name + '_detections_' + str(test_scale_index) + '.pkl')
        # if os.path.exists(det_file_single_scale):
        #    continue
        cfg.SCALES = [test_scale]
        test_data.reset()

        # all detections are collected into:
        #    all_boxes[cls][image] = N x 5 array of detections in
        #    (x1, y1, x2, y2, score)
        all_boxes_single_scale = [[[] for _ in range(num_images)]
                                  for _ in range(imdb.num_classes)]

        detect_at_single_scale(predictor, data_names, imdb, test_data, cfg,
                               thresh, vis, all_boxes_single_scale, logger)

        with open(det_file_single_scale, 'wb') as f:
            cPickle.dump(all_boxes_single_scale,
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]

    for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES):
        det_file_single_scale = os.path.join(
            imdb.result_path,
            imdb.name + '_detections_' + str(test_scale_index) + '.pkl')
        if os.path.exists(det_file_single_scale):
            with open(det_file_single_scale, 'rb') as fid:
                all_boxes_single_scale = cPickle.load(fid)
            for idx_class in range(1, imdb.num_classes):
                for idx_im in range(0, num_images):
                    if len(all_boxes[idx_class][idx_im]) == 0:
                        all_boxes[idx_class][idx_im] = all_boxes_single_scale[
                            idx_class][idx_im]
                    else:
                        all_boxes[idx_class][idx_im] = np.vstack(
                            (all_boxes[idx_class][idx_im],
                             all_boxes_single_scale[idx_class][idx_im]))

    for idx_class in range(1, imdb.num_classes):
        for idx_im in range(0, num_images):
            if cfg.TEST.USE_SOFTNMS:
                soft_nms = py_softnms_wrapper(cfg.TEST.SOFTNMS_THRESH,
                                              max_dets=max_per_image)
                all_boxes[idx_class][idx_im] = soft_nms(
                    all_boxes[idx_class][idx_im])
            else:
                nms = py_nms_wrapper(cfg.TEST.NMS)
                keep = nms(all_boxes[idx_class][idx_im])
                all_boxes[idx_class][idx_im] = all_boxes[idx_class][idx_im][
                    keep, :]

    if max_per_image > 0:
        for idx_im in range(0, num_images):
            image_scores = np.hstack([
                all_boxes[j][idx_im][:, -1]
                for j in range(1, imdb.num_classes)
            ])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(
                        all_boxes[j][idx_im][:, -1] >= image_thresh)[0]
                    all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :]

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL)

    info_str = imdb.evaluate_detections(all_boxes)
    if logger:
        logger.info('evaluate detections: \n{}'.format(info_str))
示例#26
0
def main():
    # get symbol

    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    max_per_image = config.TEST.max_per_image

    # Print the test scales
    print("Train scales: %s" % str(config.SCALES))
    print("Test scales: %s" % str(config.TEST_SCALES))

    # load demo data
    #dataBaseDir = '/b_test/pkhan/datasets/Receipts/data/'
    dataBaseDir = '/netscratch/queling/data/'
    outputBaseDir = '/netscratch/queling/Deformable/output/fpn/deep_receipt/results/' + EXPERIMENT_NAME
    #outputBaseDir = '/b_test/pkhan/Code/Deformable/output/' + EXPERIMENT_NAME

    if os.path.exists(outputBaseDir):
        shutil.rmtree(outputBaseDir)
    os.mkdir(outputBaseDir)

    outputFile = open(os.path.join(outputBaseDir, 'output.txt'), 'w')
    outputFile.write('<?xml version="1.0" encoding="UTF-8"?>\n')
    errorStatsFile = open(
        os.path.join(outputBaseDir, 'incorrect-detections.txt'), 'w')

    incorrectDetectionResultsPath = os.path.join(outputBaseDir,
                                                 'IncorrectDetections')
    if not os.path.exists(incorrectDetectionResultsPath):
        os.mkdir(incorrectDetectionResultsPath)

    detectionResultsPath = os.path.join(outputBaseDir, 'Detections')
    if not os.path.exists(detectionResultsPath):
        os.mkdir(detectionResultsPath)

    annotationResultsPath = os.path.join(outputBaseDir, 'Annotations')
    if not os.path.exists(annotationResultsPath):
        os.mkdir(annotationResultsPath)

    statistics = {}
    for cls_ind, cls in enumerate(CLASSES):
        statistics[cls] = {}
        for thresh in IoU_THRESHOLDS:
            statistics[cls][thresh] = {}
            statistics[cls][thresh]["truePositives"] = 0
            statistics[cls][thresh]["falsePositives"] = 0
            statistics[cls][thresh]["falseNegatives"] = 0
            statistics[cls][thresh]["precision"] = 0
            statistics[cls][thresh]["recall"] = 0
            statistics[cls][thresh]["fMeasure"] = 0

    im_names_file = open(os.path.join(dataBaseDir, 'ImageSets/image.txt'),
                         'r')  #test.txt for whole dataset, image.txt for one

    for im_name in im_names_file:
        im_name = im_name.strip()
        # print ("Processing file: %s" % (im_name))

        found = False
        for ext in IMAGE_EXTENSIONS:

            im_name_with_ext = im_name + ext
            im_path = os.path.join(
                dataBaseDir, 'Test',
                im_name_with_ext)  #Images for whole dataset, Test for one

            if os.path.exists(im_path):
                found = True
                break
        if not found:
            print("Error: Unable to locate file %s" % (im_name))
            exit(-1)

        # Load GT annotations

        xml_path = os.path.join(dataBaseDir, 'Annotations', im_name + '.xml')

        #gtBBoxes = loadGTAnnotationsFromXML(xml_path)

        tic()

        dets_nms = [[] for j in range(len(TOTAL_CLASSES) - 1)]

        for testScale in config.SCALES:
            data = []
            im = cv2.imread(im_path,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = testScale[0]
            max_size = testScale[1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

            # get predictor
            data_names = ['data', 'im_info']
            label_names = []
            data = [[mx.nd.array(data[i][name]) for name in data_names]
                    for i in xrange(len(data))]
            max_data_shape = [[('data', (1, 3, testScale[0], testScale[1]))]]
            provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                            for i in xrange(len(data))]
            provide_label = [None for i in xrange(len(data))]
            # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
            arg_params, aux_params = load_param(MODEL_PATH,
                                                MODEL_EPOCH,
                                                process=True)
            predictor = Predictor(sym,
                                  data_names,
                                  label_names,
                                  context=[mx.gpu(0)],
                                  max_data_shapes=max_data_shape,
                                  provide_data=provide_data,
                                  provide_label=provide_label,
                                  arg_params=arg_params,
                                  aux_params=aux_params)

            # # warm up
            for j in xrange(2):
                data_batch = mx.io.DataBatch(
                    data=[data[0]],
                    label=[],
                    pad=0,
                    index=0,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[0])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]
                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)

            # test
            image_names = [im_name]  # Way around
            for idx, im_name in enumerate(image_names):
                data_batch = mx.io.DataBatch(
                    data=[data[idx]],
                    label=[],
                    pad=0,
                    index=idx,
                    provide_data=[[(k, v.shape)
                                   for k, v in zip(data_names, data[idx])]],
                    provide_label=[None])
                scales = [
                    data_batch.data[i][1].asnumpy()[0, 2]
                    for i in xrange(len(data_batch.data))
                ]

                scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                     data_names, scales,
                                                     config)
                boxes = boxes[0].astype('f')
                scores = scores[0].astype('f')

                # TODO: Multi-scale testing
                for j in range(1, scores.shape[1]):
                    cls_scores = scores[:, j, np.newaxis]
                    cls_boxes = boxes[:, 4:
                                      8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                             j *
                                                                             4:
                                                                             (j
                                                                              +
                                                                              1
                                                                              )
                                                                             *
                                                                             4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # if config.TEST.USE_SOFTNMS:
                    #     soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image)
                    #     cls_dets = soft_nms(cls_dets)
                    # else:
                    #     nms = py_nms_wrapper(config.TEST.NMS)
                    #     keep = nms(cls_dets)
                    #     cls_dets = cls_dets[keep, :]
                    # cls_dets = cls_dets[cls_dets[:, -1] > confidenceThreshold, :]
                    # dets_nms.append(cls_dets)
                    if len(dets_nms[j - 1]) == 0:
                        dets_nms[j - 1] = cls_dets
                    else:
                        dets_nms[j - 1] += cls_dets

        finalDetections = []
        for clsIter in range(len(dets_nms)):
            # print ("Performing NMS on cls %d with %d boxes" % (clsIter, len(dets_nms[clsIter])))
            if config.TEST.USE_SOFTNMS:
                soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH,
                                              max_dets=max_per_image)
                # cls_dets = soft_nms(dets_nms[clsIter])
                dets_nms[clsIter] = soft_nms(dets_nms[clsIter])
            else:
                nms = py_nms_wrapper(config.TEST.NMS)
                keep = nms(dets_nms[clsIter])
                # cls_dets = dets_nms[clsIter][keep, :]
                dets_nms[clsIter] = dets_nms[clsIter][keep, :]
            dets_nms[clsIter] = dets_nms[clsIter][
                dets_nms[clsIter][:, -1] > CONFIDENCE_THRESHOLD, :]

        # if max_per_image > 0:
        #     for idx_im in range(0, num_images):
        #         image_scores = np.hstack([all_boxes[j][idx_im][:, -1]
        #                                   for j in range(1, imdb.num_classes)])
        #         if len(image_scores) > max_per_image:
        #             image_thresh = np.sort(image_scores)[-max_per_image]
        #             for j in range(1, imdb.num_classes):
        #                 keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0]
        #                 all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :]

        print 'Processing image: {} {:.4f}s'.format(im_name, toc())

        # Add detections on the image
        im = cv2.imread(
            im_path)  # Reload the image since the previous one was scaled

        item = 0
        price = 0
        asd = 0
        row = 0

        for cls_idx, cls_name in enumerate(CONCERNED_ERRORS):
            cls_dets = dets_nms[cls_idx]
            for det in cls_dets:
                predictedBBox = det[:4]
                cv2.rectangle(im,
                              (int(predictedBBox[0]), int(predictedBBox[1])),
                              (int(predictedBBox[2]), int(predictedBBox[3])),
                              (0, 0, 255), 1)
                w = predictedBBox[2] - predictedBBox[0]
                cv2.putText(im, cls_name,
                            (int(predictedBBox[0] +
                                 (w / 2.0) - 100), int(predictedBBox[1] - 5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0), 1)

                crop_im = im[int(predictedBBox[1]):int(predictedBBox[3]),
                             int(predictedBBox[0]):int(predictedBBox[2])]
                gray = cv2.cvtColor(crop_im, cv2.COLOR_BGR2GRAY)

                if cls_name == "price":
                    asd = price + 1
                    price = price + 1
                    new_path = outputBaseDir + "/price/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)
                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    cv2.imwrite(outputImagePath, crop_im)

                elif cls_name == "item_name":
                    item = item + 1
                    asd = item
                    new_path = outputBaseDir + "/item/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)

                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == "row":
                    row = row + 1
                    asd = row
                    new_path = outputBaseDir + "/row/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(
                        new_path, cls_name + str(asd) + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'total_price':
                    print("Found Total")
                    new_path = outputBaseDir + "/total/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                elif cls_name == 'header':
                    new_path = outputBaseDir + "/header/"
                    if not os.path.exists(new_path):
                        os.makedirs(new_path)

                    outputImagePath = os.path.join(new_path, cls_name + ".jpg")
                    # print ("Writing image: %s" % (outputImagePath))
                    gray = cv2.medianBlur(gray, 3)
                    cv2.imwrite(outputImagePath, gray)

                outputImagePath = os.path.join(outputBaseDir,
                                               cls_name + str(asd) + ".jpg")
                # print ("Writing image: %s" % (outputImagePath))
                cv2.imwrite(outputImagePath, crop_im)
                text = pytesseract.image_to_string(Image.open(outputImagePath))
                #if text != "":
                # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>")
                #print("")
                #print(cls_name+": "+text)
                #print(" ")
        items = []
        for k in range(1, item):
            path_item = outputBaseDir + "/item/item_name" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            #text_item = spellCheck.main(text_item, "product")
            print(str(k) + ": " + text_item)

            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                items = items + [text_item]

        print("-------------------------------------------------------------")
        prices = []
        for k in range(1, price):
            path_item = outputBaseDir + "/price/price" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item),
                                                    config="--psm 13")
            print(str(k) + ": " + text_item)
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                prices = prices + [text_item]

            print(
                "-------------------------------------------------------------"
            )

        rows = []
        for k in range(1, row):
            path_item = outputBaseDir + "/row/row" + str(k) + ".jpg"
            text_item = pytesseract.image_to_string(Image.open(path_item))
            if text_item == "":
                print("empty and not relevant")
                #print(type(text_item))
            else:
                import unicodedata
                #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))
                #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')))
                rows = rows + [text_item]

            print(str(k) + ": " + text_item)

        # write total in result.txt
        path_item = outputBaseDir + "/total/total_price.jpg"
        text_item = pytesseract.image_to_string(Image.open(path_item))

        f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
        f.write(text_item + "\n")
        f.close()

        #path_item = outputBaseDir+"/header/header.jpg"
        #text_item = pytesseract.image_to_string(Image.open(path_item))
        #print("Header: "+text_item)

        found = False

        for k in range(0, len(items)):
            for l in range(0, len(rows)):
                #print(type(items[k]))
                #print(type(rows[l]))
                if items[k].encode('ascii', 'ignore') in rows[l].encode(
                        'ascii', 'ignore'):
                    for m in range(0, len(prices)):
                        #print(type(prices[m].encode('ascii' ,'ignore')))
                        if prices[k].encode('ascii',
                                            'ignore') in rows[l].encode(
                                                'ascii', 'ignore'):
                            #items[k] = spellCheck.main(items[k], "product")
                            f = open(
                                "/netscratch/queling/Deformable/fpn/results.txt",
                                "a")
                            f.write(items[k] + "\n")
                            f.write(str(prices[m]) + "\n")
                            f.close()
                            found = True

            # Product not found in row
            if (found == False):
                #items[k] = spellCheck.main(items[k], "product")
                f = open("/netscratch/queling/Deformable/fpn/results.txt", "a")
                f.write(items[k] + "\n")
                f.write(" " + "\n")
                f.close()

            found = False

        # Add gt annotations
        #for bbox in gtBBoxes:
        #    if bbox[5] in CONCERNED_ERRORS:
        #        cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1)

        # Computate the statistics for the current image
        #statistics, classificationErrorMessage = computeStatistics(dets_nms, gtBBoxes, statistics, IoU_THRESHOLDS)
        #if classificationErrorMessage is not None:
        #    print ("Writing incorrect image: %s" % (im_name))
        #    errorStatsFile.write("%s: %s\n" % (im_name, classificationErrorMessage))
        #    cv2.imwrite(os.path.join(incorrectDetectionResultsPath, im_name + '.jpg'), im)

        # Write the output in ICDAR Format
        outputFile.write(convertToXML(im_name_with_ext, dets_nms))

        if WRITE_DETECTION_RESULTS:
            # visualize
            # im = cv2.imread(im_path)
            # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

            # # Get also the plot for saving on server
            # _, plt = show_boxes(im, dets_nms, CLASSES, 1, returnPlt=True)
            # plt.savefig(os.path.join(outputBaseDir, 'Detections', im_name[:im_name.rfind('.')] + ".png"))

            outputImagePath = os.path.join(detectionResultsPath,
                                           im_name + ".jpg")
            print("Writing image: %s" % (outputImagePath))
            cv2.imwrite(outputImagePath, im)

        if WRITE_ANNOTATION_RESULTS:
            exportToPascalVOCFormat(im_name, im_path, dets_nms,
                                    annotationResultsPath)

    outputFile.close()
    errorStatsFile.close()

    total_classes = 0
    total_F_Meausere = 0
    average_F_Meausere = 0
    # Compute final precision and recall
    outputFile = open(
        os.path.join(outputBaseDir,
                     'output-stats-' + EXPERIMENT_NAME + '.txt'), 'w')
示例#27
0
文件: demo_voc.py 项目: mrlooi/FCIS
def main():
    # get symbol
    ctx_id = [int(i) for i in config.gpus.split(',')]
    pprint.pprint(config)
    sym_instance = eval(config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg', 'COCO_test2015_000000073428.jpg',
                    'COCO_test2015_000000393281.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco', 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)

    # warm up
    for i in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))]

        if not config.TEST.USE_MASK_MERGE:
            all_boxes = [[] for _ in xrange(num_classes)]
            all_masks = [[] for _ in xrange(num_classes)]
            nms = py_nms_wrapper(config.TEST.NMS)
            for j in range(1, num_classes):
                indexes = np.where(scores[0][:, j] > 0.7)[0]
                cls_scores = scores[0][indexes, j, np.newaxis]
                cls_masks = masks[0][indexes, 1, :, :]
                try:
                    if config.CLASS_AGNOSTIC:
                        cls_boxes = boxes[0][indexes, :]
                    else:
                        raise Exception()
                except:
                    cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4]

                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                all_boxes[j] = cls_dets[keep, :]
                all_masks[j] = cls_masks[keep, :]
            dets = [all_boxes[j] for j in range(1, num_classes)]
            masks = [all_masks[j] for j in range(1, num_classes)]
        else:
            masks = masks[0][:, 1:, :, :]
            im_height = np.round(im_shapes[0][0] / scales[0]).astype('int')
            im_width = np.round(im_shapes[0][1] / scales[0]).astype('int')
            print (im_height, im_width)
            boxes = clip_boxes(boxes[0], (im_height, im_width))
            result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes,
                                                        100, im_width, im_height,
                                                        config.TEST.NMS, config.TEST.MASK_MERGE_THRESH,
                                                        config.BINARY_THRESH, ctx_id[0])

            dets = [result_dets[j] for j in range(1, num_classes)]
            masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)]
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        for i in xrange(len(dets)):
            keep = np.where(dets[i][:,-1]>0.7)
            dets[i] = dets[i][keep]
            masks[i] = masks[i][keep]
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_masks(im, dets, masks, classes, config)

    print 'done'
示例#28
0
def main():
    # get symbol
    pprint.pprint(cfg)
    cfg.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/fgfa_rfcn_vid_s'
    # 关键帧间隔*2+1为所有帧的间隔,论文中设置的KEY_FRAME_INTERVAL为10
    all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL + 1
    # all_frame_interval = 7
    max_per_image = cfg.TEST.max_per_image
    feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
    aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()

    feat_sym = feat_sym_instance.get_feat_symbol(cfg)
    aggr_sym = aggr_sym_instance.get_plot_symbol(cfg)

    # set up class names
    num_classes = 2
    classes = ['__background__', 'smoke']

    # load demo data

    image_names = sorted(
        glob.glob(cur_path + '/../data/IR_smoke/Data/VID/val/8/*.png'))
    output_dir = cur_path + '/../demo/rfcn_fgfa_8_agg_1/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = cfg.SCALES[0][0]
        max_size = cfg.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=cfg.network.IMAGE_STRIDE)
        im_tensor = transform(im, cfg.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        feat_stride = float(cfg.network.RCNN_FEAT_STRIDE)
        data.append({
            'data': im_tensor,
            'im_info': im_info,
            'data_cache': im_tensor,
            'feat_cache': im_tensor
        })

    # get predictor

    print 'get-predictor'
    data_names = ['data', 'im_info', 'data_cache', 'feat_cache']
    label_names = []

    t1 = time.time()
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                  max([v[1] for v in cfg.SCALES]))),
        ('data_cache', (6, 3, max([v[0] for v in cfg.SCALES]),
                        max([v[1] for v in cfg.SCALES]))),
        ('feat_cache',
         ((6, cfg.network.FGFA_FEAT_DIM,
           np.ceil(max([v[0]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int),
           np.ceil(max([v[1]
                        for v in cfg.SCALES]) / feat_stride).astype(np.int))))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for _ in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model, 1, process=True)

    feat_predictors = Predictor(feat_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    aggr_predictors = Predictor(aggr_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    nms = py_nms_wrapper(cfg.TEST.NMS)

    # First frame of the video
    idx = 0
    data_batch = mx.io.DataBatch(data=[data[idx]],
                                 label=[],
                                 pad=0,
                                 index=idx,
                                 provide_data=[[
                                     (k, v.shape)
                                     for k, v in zip(data_names, data[idx])
                                 ]],
                                 provide_label=[None])
    scales = [
        data_batch.data[i][1].asnumpy()[0, 2]
        for i in xrange(len(data_batch.data))
    ]
    all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)]
    data_list = deque(maxlen=all_frame_interval)
    feat_list = deque(maxlen=all_frame_interval)
    image, feat = get_resnet_output(feat_predictors, data_batch, data_names)
    # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame)
    while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL:
        data_list.append(image)
        feat_list.append(feat)

    vis = False
    file_idx = 0
    thresh = 1e-3
    for idx, element in enumerate(data):

        data_batch = mx.io.DataBatch(data=[element],
                                     label=[],
                                     pad=0,
                                     index=idx,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, element)
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        if (idx != len(data) - 1):

            if len(data_list) < all_frame_interval - 1:
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

            else:
                #################################################
                # main part of the loop
                #################################################
                image, feat = get_resnet_output(feat_predictors, data_batch,
                                                data_names)
                data_list.append(image)
                feat_list.append(feat)

                prepare_data(data_list, feat_list, data_batch)
                flow = plot_feature(aggr_predictors, data_batch)

                # print flow.shape

                # if (cfg.TEST.SEQ_NMS==False):
                if file_idx == 20:
                    # print flow.shape
                    # flow = flow.reshape(19, 24, -1)
                    # print flow.shape
                    # step = 3
                    # plt.quiver(np.arange(0, flow.shape[1], step), np.arange(flow.shape[0], -1, -step),
                    #            flow[::step, ::step, 0], flow[::step, ::step, 1])
                    #
                    # plt.savefig(output_dir + '/' + str(i) + '.png')
                    # plt.cla()
                    for i in range(len(flow)):
                        print flow[i].shape
                        flow[i] = flow[i].reshape(19, 24, -1)
                        print flow[i].shape
                        step = 2
                        plt.quiver(np.arange(0, flow[i].shape[1], step),
                                   np.arange(flow[i].shape[0], -1, -step),
                                   flow[i][::step, ::step,
                                           0], flow[i][::step, ::step, 1])

                        plt.savefig(output_dir + '/' + str(i) + '.png')
                        plt.cla()
                        # plt.show()
                        # flow[i] = flow[i].reshape(-1, 19, 24)
                        # print flow[i].shape
                        # rgb_flow = flow2rgb(20 * flow[i], max_value=None)
                        # to_save = (rgb_flow * 255).astype(np.uint8).transpose(1, 2, 0)
                        # imwrite(output_dir+'/'+str(i)+'.png', to_save)

                    break

                print 'testing {} '.format(str(file_idx) + '.png')
                file_idx += 1
        else:
            #################################################
            # end part of a video                           #
            #################################################

            end_counter = 0
            image, feat = get_resnet_output(feat_predictors, data_batch,
                                            data_names)
            while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1:
                data_list.append(image)
                feat_list.append(feat)
                prepare_data(data_list, feat_list, data_batch)
                flow = plot_feature(aggr_predictors, data_batch)

                # print flow
                # if (cfg.TEST.SEQ_NMS == False):
                #     save_image(output_dir, file_idx, out_im)
                # print 'testing {} {:.4f}s'.format(str(file_idx) + '.png', total_time / (file_idx + 1))
                file_idx += 1
                end_counter += 1
        # break

    print 'done'