Пример #1
0
    def __init__(self):
        sym_instance = eval(config.symbol + '.' + config.symbol)()
        self.symbol = sym_instance.get_symbol(config, is_train=False)
        self.classes = ['box', 'robot']
        logging.debug("Classes: {}".format(self.classes))
        self.scales = config.SCALES[0]
        logging.debug("Scales: {}".format(self.scales))
        self.data_shape_conf = [[('data', (1, 3,
                                           self.scales[0], self.scales[1])),
                                 ('im_info', (1, 3))]]
        self.arg_params, self.aux_params = load_param(os.path.join(
            cur_path, '..', 'models', "rfcn_voc"),
                                                      0,
                                                      process=True)

        self.data_names = ['data', 'im_info']
        self.predictor = Predictor(self.symbol, ['data', 'im_info'], [],
                                   context=[mx.gpu(0)],
                                   max_data_shapes=self.data_shape_conf,
                                   provide_data=self.data_shape_conf,
                                   provide_label=[None],
                                   arg_params=self.arg_params,
                                   aux_params=self.aux_params)
        self.nms = gpu_nms_wrapper(config.TEST.NMS, 0)
        logging.info("Deformable detector initialized")
Пример #2
0
def main():
    #ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')]
    ctx = [mx.gpu(0), mx.gpu(1), mx.gpu(2), mx.gpu(3)]
    print args
    #gpu_nums = [int(i) for i in config.gpus.split(',')]
    gpu_nums = [0, 1, 2, 3]
    nms_dets = gpu_nms_wrapper(config.TEST.NMS, gpu_nums[0])
    logger, final_output_path = create_logger(config.output_path, args.cfg,
                                              config.dataset.test_image_set)
    output_path = os.path.join(
        final_output_path, '..',
        '+'.join([iset for iset in config.dataset.image_set.split('+')]),
        config.TRAIN.model_prefix)
    test_rcnn(config,
              config.dataset.dataset,
              config.dataset.test_image_set,
              config.dataset.root_path,
              config.dataset.dataset_path,
              ctx,
              output_path,
              config.TEST.test_epoch,
              args.vis,
              args.ignore_cache,
              args.shuffle,
              config.TEST.HAS_RPN,
              config.dataset.proposal,
              args.thresh,
              logger=logger,
              output_path=final_output_path,
              nms_dets=nms_dets,
              is_docker=args.is_docker)
Пример #3
0
def show_boxes_with_nms(im, scores, boxes, classes, scale, config):
    import matplotlib.pyplot as plt
    dets_nms = []
    scores = scores.astype('f')
    boxes = boxes.astype('f')
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    for j in range(1, scores.shape[1]):
        cls_scores = scores[:, j, np.newaxis]
        cls_boxes = boxes[:, 4:8] * scale
        cls_dets = np.hstack((cls_boxes, cls_scores))
        keep = nms(cls_dets)
        cls_dets = cls_dets[keep, :]
        cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
        dets_nms.append(cls_dets)
    # visualize

    # show_boxes(im, dets_nms, classes, 1)
    import cv2
    im = image.transform_inverse(im, config.network.PIXEL_MEANS)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    print 'image',im
    print 'det_nms',len(dets_nms),dets_nms
    print 'classes',len(classes)
    out_im = draw_boxes(im, dets_nms, classes, 1)

    #out_im = np.transpose(np.squeeze(out_im), (1,2,0))
    #print out_im.shape
    plt.imshow(out_im)
    plt.show()
Пример #4
0
def proposal_test(rpn_cls, rpn_reg, feature_shape, image_shape, ctx):
    # Stop gradient to stop gradient recording
    rpn_cls = mx.nd.stop_gradient(rpn_cls)
    rpn_reg = mx.nd.stop_gradient(rpn_reg)

    # Get basic information of the feature and the image
    _n, _c, f_height, f_width = feature_shape
    _in, _ic, img_height, img_width = image_shape
    rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width))
    anchors_count = rpn_cls.shape[1]
    
    # Recover RPN prediction with anchors
    ref_anchors = generate_anchors(base_size=16, ratios=cfg.anchor_ratios, scales=cfg.anchor_scales)
    anchors = map_anchors(ref_anchors, rpn_reg.shape, img_height, img_width, ctx)
    anchors = anchors.reshape((1, -1, 4, f_height, f_width))
    anchors = mx.nd.transpose(anchors, (0, 3, 4, 1, 2))
    rpn_anchor_scores = mx.nd.softmax(mx.nd.transpose(rpn_cls, (0, 3, 4, 1, 2)), axis=4)[:,:,:,:,1]
    rpn_reg = mx.nd.transpose(rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 3, 4, 1, 2))

    rpn_bbox_pred = bbox_inverse_transform(anchors.reshape((-1, 4)), rpn_reg.reshape((-1, 4)))
    rpn_bbox_pred = bbox_clip(rpn_bbox_pred, img_height, img_width)
    rpn_bbox_pred = rpn_bbox_pred.reshape((1, f_height, f_width, anchors_count, 4))

    # Use NMS to filter out too many boxes
    rpn_bbox_pred = rpn_bbox_pred.asnumpy().reshape((-1, 4))
    rpn_anchor_scores = rpn_anchor_scores.asnumpy().reshape((-1, ))
    rpn_bbox_proposal = np.hstack((rpn_bbox_pred, rpn_anchor_scores.reshape((rpn_anchor_scores.shape[0], 1))))
    # rpn_anchor_scores, rpn_bbox_pred = nms(rpn_anchor_scores, rpn_bbox_pred, cfg.rpn_nms_thresh, use_top_n=cfg.bbox_count_before_nms)
    gpu_nms = gpu_nms_wrapper(cfg.rpn_nms_thresh, ctx.device_id, use_top_n=cfg.bbox_count_before_nms)
    keep = gpu_nms(rpn_bbox_proposal)
    rpn_bbox_proposal = rpn_bbox_proposal[keep][:, :4]
    # rpn_bbox_pred = mx.nd.array(rpn_bbox_pred, ctx)
    rpn_bbox_pred = mx.nd.array(rpn_bbox_proposal, ctx)
    del rpn_anchor_scores

    # Keep first cfg.rcnn_test_sample_size boxes
    if rpn_bbox_pred.shape[0] > cfg.rcnn_test_sample_size:
        rpn_bbox_pred = rpn_bbox_pred[:cfg.rcnn_test_sample_size, :]
    
    return rpn_bbox_pred
    def load_data_and_get_predictor(self, image_names):
        # load demo data

        #image_names = ['COCO_test2015_000000000891.jpg',
        #            'COCO_test2015_000000001669.jpg']
        data = []
        for im_name in image_names:
            #assert os.path.exists(
            #    cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
            #im = cv2.imread(cur_path + '/../demo/' + im_name,
            #                cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im = cv2.imread(im_name, cv2.IMREAD_COLOR |
                            cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})

        # get predictor
        self.data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in self.data_names]
                for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max(
            [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])]
                        for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]
        self.predictor = Predictor(self.sym, self.data_names, label_names,
                                   context=[mx.gpu(1)], max_data_shapes=max_data_shape,
                                   provide_data=provide_data, provide_label=provide_label,
                                   arg_params=self.arg_params, aux_params=self.aux_params)
        self.nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        return data
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError("Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_prob_dict = {
            'stride64': in_data[4],
            'stride32': in_data[3],
            'stride16': in_data[2],
            'stride8': in_data[1],
            'stride4': in_data[0],
        }
        bbox_pred_dict = {
            'stride64': in_data[9],
            'stride32': in_data[8],
            'stride16': in_data[7],
            'stride8': in_data[6],
            'stride4': in_data[5],
        }

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios)
            scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :]
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        if self._output_score:
            self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
Пример #7
0
def main():
    # get symbol
    pprint.pprint(config)
    # config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    config.symbol = 'resnet_v1_101_fpn_rcnn_rotbox_light_head_RoITransformer'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 15
    classes = ['__background__',  # always index 0
                        'plane', 'baseball-diamond',
                        'bridge', 'ground-track-field',
                        'small-vehicle', 'large-vehicle',
                        'ship', 'tennis-court',
                        'basketball-court', 'storage-tank',
                        'soccer-ball-field', 'roundabout',
                        'harbor', 'swimming-pool',
                        'helicopter']
    # load demo data
    image_names = ['P0004__1__0___0.png', 'P0053__1__0___0.png', 'P0060__1__1648___824.png']
    data = []
    for im_name in image_names:
        # pdb.set_trace()
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    # TODO: change this path
    arg_params, aux_params = load_param(r'/home/dj/code/Deformable_FPN_DOTA/output/fpn/DOTA/resnet_v1_101_dota_rotbox_light_head_Rroi_v6_trainval_fpn_end2end/train/fpn_DOTA_oriented',
                                            config.TEST.test_epoch, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect_rotbox_Rroi(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect_rotbox_Rroi(predictor, data_batch, data_names, scales, config)
        # boxes = boxes[0].astype('f')
        # scores = scores[0].astype('f')
        boxes = boxes[0].astype('float64')
        scores = scores[0].astype('float64')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            # cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_boxes = boxes[:, 8:16] if config.CLASS_AGNOSTIC else boxes[:, j * 8:(j + 1) * 8]
            cls_quadrangle_dets = np.hstack((cls_boxes, cls_scores))
            # keep = nms(cls_dets)
            keep = py_cpu_nms_poly(cls_quadrangle_dets, 0.3)
            cls_quadrangle_dets = cls_quadrangle_dets[keep, :]
            cls_quadrangle_dets = cls_quadrangle_dets[cls_quadrangle_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_quadrangle_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        # im = cv2.imread(cur_path + '/../demo/' + im_name)
        # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # pdb.set_trace()
        im = draw_all_poly_detection(data_dict[0]['data'].asnumpy(), dets_nms, classes[1:], data[idx][1].asnumpy()[0][2], config,
                                     threshold=0.2)
        cv2.imwrite(cur_path + '/../demo/' + 'results' + im_name, im)
        # show_boxes(im, dets_nms, classes, 1)

    print 'done'
Пример #8
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, 0)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores_list = in_data[0].asnumpy()  #[1,n]
        #print 'score_list shape:',scores_list.shape
        bbox_deltas_list = in_data[1].asnumpy()  #[1,n*2]
        im_info = in_data[2].asnumpy()[0, :]
        p2_shape = in_data[3].asnumpy().shape
        p3_shape = in_data[4].asnumpy().shape
        p4_shape = in_data[5].asnumpy().shape
        p5_shape = in_data[6].asnumpy().shape
        p6_shape = in_data[7].asnumpy().shape
        feat_shape = []
        feat_shape.append(p2_shape)
        feat_shape.append(p3_shape)
        feat_shape.append(p4_shape)
        feat_shape.append(p5_shape)
        feat_shape.append(p6_shape)
        #t = time.time()
        #print 'feat_shape:', feat_shape
        num_feat = len(feat_shape)  #[1,5,4]
        score_index_start = 0
        bbox_index_start = 0
        keep_proposal = []
        keep_scores = []

        #t_1 = time.time()
        for i in range(num_feat):
            feat_stride = int(self._feat_stride[i])  #4,8,16,32,64
            #print 'feat_stride:', feat_stride
            anchor = generate_anchors(feat_stride,
                                      scales=self._scales,
                                      ratios=self._ratios)
            num_anchors = anchor.shape[0]  #3

            height = feat_shape[i][2]
            width = feat_shape[i][3]

            shift_x = np.arange(0, width) * feat_stride
            shift_y = np.arange(0, height) * feat_stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            A = num_anchors  #3
            K = shifts.shape[0]  #height*width
            anchors = anchor.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))  #3*height*widht,4
            scores = (scores_list[
                0,
                int(score_index_start):int(score_index_start +
                                           K * A * 2)]).reshape(
                                               (1, int(2 * num_anchors), -1,
                                                int(width)))  #1,2*3,h,w
            scores = scores[:, num_anchors:, :, :]  #1,3,h,w
            bbox_deltas = (bbox_deltas_list[
                0, int(bbox_index_start):int(bbox_index_start +
                                             K * A * 4)]).reshape(
                                                 (1, int(4 * num_anchors), -1,
                                                  int(width)))  #1,4*3,h,w
            score_index_start += K * A * 2
            bbox_index_start += K * A * 4
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))  #[1,h,w,12]--->[1*h*w*3,4]
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape(
                (-1, 1))  #[1,h,w,3]--->[1*h*w*3,1]
            proposals = bbox_pred(anchors,
                                  bbox_deltas)  #debug here, corresponding?
            proposals = clip_boxes(proposals, im_info[:2])
            keep = self._filter_boxes(proposals, min_size[i] * im_info[2])
            keep_proposal.append(proposals[keep, :])
            keep_scores.append(scores[keep])

        proposals = keep_proposal[0]
        scores = keep_scores[0]
        for i in range(1, num_feat):
            proposals = np.vstack((proposals, keep_proposal[i]))
            scores = np.vstack((scores, keep_scores[i]))
        #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        #t_2 = time.time()
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2)
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        #t_nms = time.time()
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:, 2] = 16
                proposals[:, 3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1),
                                      dtype=np.float32)
                blob = np.hstack(
                    (batch_inds, proposals.astype(np.float32, copy=False)))
                self.assign(out_data[0], req[0], blob)

                if self._output_score:
                    self.assign(out_data[1], req[1],
                                scores.astype(np.float32, copy=False))
                return
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2)
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Пример #9
0
def main():
    # get symbol
    pprint.pprint(config)
    #config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 5
    classes = ["car", "bus", "van", "others"]

    # load demo videos
    im_path = '../../aic2018/track1/images/'
    image_names = [
        x for x in os.listdir('../../aic2018/track1/images/')
        if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1"))
            ) and not x.endswith("_bbox.jpg")
    ]
    data = []
    for idx, im_name in enumerate(image_names[:1]):
        if idx == 0:
            assert os.path.exists(im_path + im_name), (
                '%s does not exist'.format(im_path + im_name))
            im = cv2.imread(im_path + im_name,
                            cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
        else:
            data.append({'data': None, 'im_info': None})

    print(data)

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[0][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    # what does provide_data and provide_label work for?
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    ## load parameters
    arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac',
                                        1,
                                        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    print("successfully load model")

    # find all videos
    video_path = "../../tmp"
    video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")]
    save_path = "../../tmp/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg')
        vout = []
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im,
                                  target_size,
                                  max_size,
                                  stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[
                mx.nd.array(data_idx[i][name]) for name in data_names
            ] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(
                data=[data_idx[0]],
                label=[],
                pad=0,
                index=idx,
                provide_data=[[(k, v.shape)
                               for k, v in zip(data_names, data_idx[0])]],
                provide_label=[None])

            scales = [
                data_batch.data[i][1].asnumpy()[0, 2]
                for i in xrange(len(data_batch.data))
            ]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch,
                                                 data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(
                vf, frame_idx, toc(), len(dets_nms))
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)

            for out in outputs:
                vout.append([frame_idx] + out)

        # save the whole video detection into pickle file
        with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")),
                  "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)

    pbar.close()
    print 'done'
Пример #10
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['test_city_inter.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.5, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        save_im, _ = show_boxes(im, dets_nms, classes, 1)
        cv2.imwrite(cur_path + '/../demo/' + im_name.replace(".jpg", "_out.jpg"), save_im)

    print 'done'
Пример #11
0
    ctx = [mx.gpu(gpu_id)]
    update_config(
        "experiments/fpn/cfgs/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml"
    )
    sym = get_symbol(config)
    net = SymbolBlock(sym=sym,
                      input_names=["data", "im_info"],
                      pretrained="fpn_coco-3-0.0.params")
    net.collect_params().reset_ctx(ctx)
    im_names = list(
        lsdir("/data1/zyx/yks/dataset/guangdong_round2_test_a_20181011",
              suffix=".jpg"))

    # shuffle(im_names)

    nms_wrapper = nms = gpu_nms_wrapper(config.TEST.NMS, gpu_id)
    # score = validate(net, nms_wrapper, ctx_list = ctx)
    # print(score)
    results = {}
    results["results"] = []
    for im_name in tqdm.tqdm(im_names):
        TEST_SCALES = [[960, 1280]]
        one_img = {}
        one_img["filename"] = os.path.basename(im_name)
        one_img["rects"] = []
        bboxes, scores, labels = im_detect_bbox_aug(net,
                                                    nms_wrapper,
                                                    im_name,
                                                    TEST_SCALES,
                                                    config.network.PIXEL_MEANS,
                                                    config.TRAIN.BBOX_STDS,
Пример #12
0
def main():
    global classes

    assert os.path.exists(args.input), ('%s does not exist'.format(args.input))
    im = cv2.imread(args.input,
                    cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
    arr = np.array(im)
    origin_width, origin_height, _ = arr.shape

    portion = smart_chipping(origin_width, origin_height)

    # manually update the configuration
    # print(config.SCALES[0][0])
    # TODO: note this is hard coded and assume there are three values for the SCALE configuration
    config.SCALES[0] = (portion, portion, portion)
    # config.max_per_image =
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_fpn_dcn_rcnn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # load demo data
    data = []

    # portion = args.chip_size

    cwn, chn = (portion, portion)
    wn, hn = (int(origin_width / cwn), int(origin_height / chn))
    padding_y = int(
        math.ceil(float(origin_height) / chn) * chn - origin_height)
    padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width)
    print("padding_y,padding_x, origin_height, origin_width", padding_y,
          padding_x, origin_height, origin_width)
    # top, bottom, left, right - border width in number of pixels in corresponding directions
    im = cv2.copyMakeBorder(im,
                            0,
                            padding_x,
                            0,
                            padding_y,
                            cv2.BORDER_CONSTANT,
                            value=[0, 0, 0])
    # the section below could be optimized. but basically the idea is to re-calculate all the values
    arr = np.array(im)
    width, height, _ = arr.shape
    cwn, chn = (portion, portion)
    wn, hn = (int(width / cwn), int(height / chn))

    image_list = chip_image(im, (portion, portion))
    for im in image_list:
        target_size = portion
        max_size = portion
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        # print("im.shape,im_scale",im.shape,im_scale)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' +
                                        ('fpn_dcn_xview_480_640_800_alltrain'),
                                        11,
                                        process=True)

    # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True)
    print("loading parameter done")

    if args.cpu_only:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.cpu()],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = py_nms_wrapper(config.TEST.NMS)
    else:
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(args.gpu_index)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    num_preds = int(5000 * math.ceil(float(portion) / 400))
    # test
    boxes, scores, classes = generate_detections(data, data_names, predictor,
                                                 config, nms, image_list,
                                                 num_preds)
    #Process boxes to be full-sized

    print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height",
          width, height)
    bfull = boxes.reshape((wn, hn, num_preds, 4))

    for i in range(wn):
        for j in range(hn):
            bfull[i, j, :, 0] += j * cwn
            bfull[i, j, :, 2] += j * cwn

            bfull[i, j, :, 1] += i * chn
            bfull[i, j, :, 3] += i * chn

            # clip values
            bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height)
            bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height)

            bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width)
            bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width)

    bfull = bfull.reshape((hn * wn, num_preds, 4))
    scores = scores.reshape((hn * wn, num_preds))
    classes = classes.reshape((hn * wn, num_preds))

    #only display boxes with confidence > .5

    # print(bfull, scores, classes)
    #bs = bfull[scores > 0.08]
    #cs = classes[scores>0.08]
    #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape)
    # s = im_name
    # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png")

    #scoring_line_threshold = 11000

    #if bs.shape[0] > scoring_line_threshold:
    # too many predictions, we should trim the low confidence ones
    with open(args.output, 'w') as f:
        for i in range(bfull.shape[0]):
            for j in range(bfull[i].shape[0]):
                #box should be xmin ymin xmax ymax
                box = bfull[i, j]
                class_prediction = classes[i, j]
                score_prediction = scores[i, j]
                if int(class_prediction) != 0:
                    f.write('%d %d %d %d %d %f \n' % \
                        (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction))

    print('done')
Пример #13
0
def proposal_train(rpn_cls, rpn_reg, gt, feature_shape, image_shape, ctx):
    # Stop gradient to stop gradient recording
    rpn_cls = mx.nd.stop_gradient(rpn_cls)
    rpn_reg = mx.nd.stop_gradient(rpn_reg)

    # Get basic information of the feature and the image
    _n, _c, f_height, f_width = feature_shape
    _in, _ic, img_height, img_width = image_shape
    rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width))
    anchors_count = rpn_cls.shape[1]
    
    # Recover RPN prediction with anchors
    ref_anchors = generate_anchors(base_size=16, ratios=cfg.anchor_ratios, scales=cfg.anchor_scales)
    anchors = map_anchors(ref_anchors, rpn_reg.shape, img_height, img_width, ctx)
    anchors = anchors.reshape((1, -1, 4, f_height, f_width))
    anchors = mx.nd.transpose(anchors, (0, 3, 4, 1, 2))
    rpn_anchor_scores = mx.nd.softmax(mx.nd.transpose(rpn_cls, (0, 3, 4, 1, 2)), axis=4)[:,:,:,:,1]
    rpn_reg = mx.nd.transpose(rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 3, 4, 1, 2))
    with mx.autograd.pause():
        rpn_bbox_pred = bbox_inverse_transform(anchors.reshape((-1, 4)), rpn_reg.reshape((-1, 4)))
        rpn_bbox_pred = bbox_clip(rpn_bbox_pred, img_height, img_width)
        rpn_bbox_pred = rpn_bbox_pred.reshape((1, f_height, f_width, anchors_count, 4))

    # Use NMS to filter out too many boxes
    rpn_bbox_pred = rpn_bbox_pred.asnumpy().reshape((-1, 4))
    rpn_anchor_scores = rpn_anchor_scores.asnumpy().reshape((-1, ))
    rpn_bbox_proposal = np.hstack((rpn_bbox_pred, rpn_anchor_scores.reshape((rpn_anchor_scores.shape[0], 1))))
    # rpn_anchor_scores, rpn_bbox_pred = nms(rpn_anchor_scores, rpn_bbox_pred, cfg.rpn_nms_thresh, use_top_n=cfg.bbox_count_before_nms)
    gpu_nms = gpu_nms_wrapper(cfg.rpn_nms_thresh, ctx.device_id, use_top_n=cfg.bbox_count_before_nms)
    keep = gpu_nms(rpn_bbox_proposal)
    rpn_bbox_proposal = rpn_bbox_proposal[keep][:, :4]

    # rpn_bbox_pred = mx.nd.array(rpn_bbox_pred, ctx)
    rpn_bbox_pred = mx.nd.array(rpn_bbox_proposal, ctx)
    del rpn_anchor_scores

    # append ground truth
    rpn_bbox_pred = mx.nd.concatenate([rpn_bbox_pred, gt[0][:,:4]])

    # assign label for rpn_bbox_pred
    overlaps = bbox_overlaps(rpn_bbox_pred, gt[0][:, :4].reshape((-1, 4)))
    gt_assignment = mx.nd.argmax(overlaps, axis=1).asnumpy().astype(np.int32)
    max_overlaps = mx.nd.max(overlaps, axis=1).asnumpy()
    gt_labels = gt[0][:, 4].reshape((-1,)).asnumpy()
    gt_bboxes = gt[0][:, :4].reshape((-1, 4)).asnumpy()
    cls_labels = gt_labels[gt_assignment]
    rpn_bbox_pred_np = rpn_bbox_pred.asnumpy()
    reg_target = gt_bboxes[gt_assignment, :]
    cls_labels = cls_labels * (max_overlaps >= cfg.rcnn_fg_thresh)

    # sample positive and negative ROIs
    fg_inds = np.where(max_overlaps >= cfg.rcnn_fg_thresh)[0]
    bg_inds = np.where((max_overlaps >= cfg.rcnn_bg_lo_thresh) * (max_overlaps < cfg.rcnn_fg_thresh))[0]
    fg_nums = int(cfg.rcnn_train_sample_size * cfg.rcnn_train_fg_fraction)
    bg_nums = cfg.rcnn_train_sample_size - fg_nums
    if (len(fg_inds) > fg_nums):
        fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False)
    if (len(bg_inds) > bg_nums):
        bg_inds = np.random.choice(bg_inds, size=bg_nums, replace=False)
    cls_labels = np.concatenate([cls_labels[fg_inds], cls_labels[bg_inds]])
    reg_target = np.concatenate([reg_target[fg_inds], reg_target[bg_inds]])
    rpn_bbox_pred_np = np.concatenate([rpn_bbox_pred_np[fg_inds], rpn_bbox_pred_np[bg_inds]])
    cls_labels = mx.nd.array(cls_labels, ctx)
    reg_target = mx.nd.array(reg_target, ctx)
    rpn_bbox_pred = mx.nd.array(rpn_bbox_pred_np, ctx)
    reg_target = bbox_transform(rpn_bbox_pred, reg_target)
    
    # Shape reg_target into 4 * num_classes
    reg_large_target = mx.nd.zeros((reg_target.shape[0], 4 * cfg.num_classes), ctx)
    for i in range(cls_labels.shape[0]):
        cur_label = int(cls_labels[i].asscalar())
        if (cur_label != 0):
            reg_large_target[i, cur_label*4: (cur_label+1)*4] = reg_target[i, :]
    
    return rpn_bbox_pred, reg_large_target, cls_labels
Пример #14
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

    # load demo data
    image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg']
    data = []
    for im_name in image_names:
        assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name))
        im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(cur_path + '/../demo/' + im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        show_boxes(im, dets_nms, classes, 1)

    print 'done'
Пример #15
0
def process_video_frame(raw_frame_queue, bbox_frame_queue):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    target_size = config.SCALES[0][1]
    max_size = config.SCALES[0][1]

    while True:
        tic()
        i = 0
        data = []
        frame_list = []
        while len(data) < 15:
            frame = raw_frame_queue.get()
            if frame is None:
                continue
            if i < 2:
                i += 1
                frame, im_scale = resize(frame,
                                         target_size,
                                         max_size,
                                         stride=config.network.IMAGE_STRIDE)
                bbox_frame_queue.put(frame)
                continue
            frame, im_scale = resize(frame,
                                     target_size,
                                     max_size,
                                     stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(frame, config.network.PIXEL_MEANS)
            im_info = np.array(
                [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                dtype=np.float32)
            data.append({'data': im_tensor, 'im_info': im_info})
            frame_list.append(frame)

        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names]
                for i in xrange(len(data))]
        # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data))
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                     max([v[1] for v in config.SCALES])))]]
        # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape))
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                        for i in xrange(len(data))]
        # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data))
        provide_label = [None for i in xrange(len(data))]
        # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label))
        predictor = Predictor(sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        # Process video frame
        # image_names = ['frame']
        # for idx, frame in enumerate(frame_list):
        data_batch = mx.io.DataBatch(data=data,
                                     label=[],
                                     pad=0,
                                     provide_data=provide_data,
                                     provide_label=provide_label)
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        # print("length: {}".format(len(data_batch.data)))
        # print('Debug: [scales] cont: {}'.format(scales))
        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, config)
        # print('scores_all: Type: {}, Values: {}, Length: {}'.format(type(scores_all), scores_all, len(scores_all)))
        # print('boxes_all: Type: {}, Values: {}, Length: {}'.format(type(boxes_all), boxes_all, len(boxes_all)))
        # print('data_dict_all: Type: {}, Values: {}, length: {}'.format(type(data_dict_all), data_dict_all, len(data_dict_all)))
        # print('frame_list: Type: {}, Values: {}, Length: {}'.format(type(frame_list), frame_list, len(frame_list)))

        # print('scores_all: Type: {}, Length: {}, Values: {}'.format(type(scores_all[0]), len(scores_all[0]), scores_all[0]))
        # print(scores_all[0].shape)
        # print('boxes_all: Type: {}, Length: {}'.format(type(boxes_all), len(boxes_all)))
        # print(boxes_all[0].shape)
        # print('data_dict_all: Type: {}, length: {}'.format(type(data_dict_all), len(data_dict_all)))
        # print('frame_list: Type: {}, Length: {}'.format(type(frame_list), len(frame_list)))

        for idx, frame in enumerate(frame_list):
            # print('index: {}'.format(str(idx)))
            boxes = boxes_all[0].astype('f')
            scores = scores_all[0].astype('f')
            dets_nms = []
            # print(scores.shape)
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:,
                                  4:8] if config.CLASS_AGNOSTIC else boxes[:,
                                                                           j *
                                                                           4:
                                                                           (j +
                                                                            1
                                                                            ) *
                                                                           4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)

            bbox_frame_queue.put(
                draw_bbox_on_frame(frame, dets_nms, classes,
                                   scale=scales[idx]))
        print(toc())
Пример #16
0
def process_image_fun(imagesPath=None,
                      fileOp=None,
                      vis=None,
                      model_params_list=None,
                      count=0):
    # init rfcn dcn detect model (mxnet)
    # model_params_list = init_detect_model()

    # num_classes = RFCN_DCN_CONFIG['num_classes']  # 0 is background,
    classes = RFCN_DCN_CONFIG['num_classes_name_list']
    min_threshold = min(list(
        RFCN_DCN_CONFIG['need_label_thresholds'].values()))

    im_name = imagesPath
    all_can_read_image = []
    data = []
    all_can_read_image.append(im_name)
    target_size = config.SCALES[0][0]
    max_size = config.SCALES[0][1]
    im, im_scale = resize(im_name,
                          target_size,
                          max_size,
                          stride=config.network.IMAGE_STRIDE)
    im_tensor = transform(im, config.network.PIXEL_MEANS)
    im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
                       dtype=np.float32)
    data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(model_params_list[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(1)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=model_params_list[1],
                          aux_params=model_params_list[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :]
            dets_nms.append(cls_dets)
        #print('testing {} {:.4f}s'.format(im_name, toc()))
        im = show_boxes_write_rg(im=im_name,
                                 dets=dets_nms,
                                 classes=classes,
                                 scale=1,
                                 vis=vis,
                                 fileOp=fileOp,
                                 count=count)
    return im
Пример #17
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_batch_test_symbol(config)
    sym.save('dff_rfcn.json')
    #print config.network.get_internals()
    #mx.visualization.plot_network(sym).view()
    #print sym.get_intervals()
    #x = input()
    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/sample/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_dff_batch/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    cur_im_tensor = []
    im_info_tensor = []
    image_names_list = []
    image_names_batch = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(
            im_name)  #, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION)
        #im = cv2.resize(im, (176,176,3))
        #height, width, channel = img.shape
        #gray = im = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
        #im = np.zeros(height * width * channel).reshape((height, width, channel))
        #im[:,:,0] = gray
        #im[:,:,1] = gray
        #im[:,:,2] = gray
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        #print im.shape
        #print im_scale.shape
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        else:
            cur_im_tensor.append(im_tensor)
        im_info_tensor.append(im_info)
        image_names_batch.append(im_name)
        if (idx + 1) % key_frame_interval == 0 or idx == len(image_names) - 1:
            data.append({
                'data_other': np.concatenate(cur_im_tensor),
                'im_info': np.concatenate(im_info_tensor),
                'data_key': key_im_tensor
            })
            key_im_tensor = None
            cur_im_tensor = []
            im_info_tensor = []
            image_names_list.append(image_names_batch)
            image_names_batch = []

    # get predictor
    data_names = ['data_other', 'im_info', 'data_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_other', (key_frame_interval - 1, 3,
                        max([v[0] for v in config.SCALES]),
                        max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    #print predictor
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(1):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]
        print scales[0].shape
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)

    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_names in enumerate(image_names_list):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[:, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores_all, boxes_all, data_dict = im_batch_detect(
            predictor, data_batch, data_names, scales, config)
        time += toc()
        count += len(scores_all)
        print 'testing {} {:.4f}s x {:d}'.format(im_names[0], time / count,
                                                 len(scores_all))
        '''
        for batch_idx in xrange(len(scores_all)):
            boxes = boxes_all[batch_idx].astype('f')
            scores = scores_all[batch_idx].astype('f')
            dets_nms = []
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
                dets_nms.append(cls_dets)
            # visualize
            im = cv2.imread(im_names[batch_idx])
            im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            # show_boxes(im, dets_nms, classes, 1)
            out_im = draw_boxes(im, dets_nms, classes, 1)
            _, filename = os.path.split(im_names[batch_idx])
            cv2.imwrite(output_dir + filename,out_im)
	'''

    print 'done'
Пример #18
0
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height,
                    nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0):
    """
    A wrapper function, note we already know the class of boxes and masks
    """
    nms = gpu_nms_wrapper(nms_thresh, device_id)
    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = [[] for _ in xrange(num_classes)]

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    # organize helper variable for gpu mask voting
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        for i in xrange(num_boxes):
            cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(t_scores[c])
        class_bar[c] = len(candidate_scores)

    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)

    # the input masks/boxes are relatively large
    # select only a subset of them are useful for mask merge
    unique_inds = np.unique(candidate_inds)
    unique_inds_order = unique_inds.argsort()
    unique_map = {}
    for i in xrange(len(unique_inds)):
        unique_map[unique_inds[i]] = unique_inds_order[i]
    for i in xrange(len(candidate_inds)):
        candidate_inds[i] = unique_map[candidate_inds[i]]
    boxes = boxes[unique_inds, ...]
    masks = masks[unique_inds, ...]

    boxes = np.round(boxes)
    result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights,
                                                 binary_thresh, im_height, im_width, device_id)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    cls_start = 0
    for i in xrange(1, num_classes):
        cls_end = class_bar[i]
        cls_box = result_box[cls_start:cls_end, :]
        cls_mask = result_mask[cls_start:cls_end, :]
        valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) &
                             (cls_box[:, 3] > cls_box[:, 1]))[0]
        list_result_box[i] = cls_box[valid_ind, :]
        list_result_mask[i] = cls_mask[valid_ind, :]
        cls_start = cls_end

    return list_result_mask, list_result_box
Пример #19
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = "resnet_v1_101_fpn_dcn_rcnn"  if not args.rfcn_only else "resnet_v1_101_fpn_rcnn"
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names
    num_classes = 81
    classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
               'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
               'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
               'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
               'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut',
               'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
               'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
               'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
    # test
    # find all videos
    video_path = "../../tmp"#"../../aic2018/track1/track1_videos"
    video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")])
    save_path = "../../tmp/output"#"../../aic2018/track1/output"
    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    
    print("processing {} videos...".format(len(video_files)))
    pbar = tqdm(total=len(video_files))
    for vf in video_files:
        vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg')
        data = []
        for idx, im in enumerate(vid):
            if idx == 0:
                #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name))
                #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
                target_size = config.SCALES[0][0]
                max_size = config.SCALES[0][1]
                im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
                im_tensor = transform(im, config.network.PIXEL_MEANS)
                im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
                data.append({'data': im_tensor, 'im_info': im_info})
            else:
                break
                #data.append({'data': None, 'im_info': None})
        
        # get predictor
        data_names = ['data', 'im_info']
        label_names = []
        data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
        max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
        provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
        provide_label = [None for i in xrange(len(data))]

        print("hhhhh")
        print(provide_data, provide_label)
        print("hhhhh")  

        arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True)

        #print(type(arg_params), type(aux_params))

        predictor = Predictor(sym, data_names, label_names,
                              context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                              provide_data=provide_data, provide_label=provide_label,
                              arg_params=arg_params, aux_params=aux_params)
        nms = gpu_nms_wrapper(config.TEST.NMS, 0)

        print("successfully load model")
        
        vout = []
        # write to video
        writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'})
        for frame_idx, im in enumerate(vid):
            #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
            im_original = im.copy()
            
            target_size = config.SCALES[0][0]
            max_size = config.SCALES[0][1]
            im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
            im_tensor = transform(im, config.network.PIXEL_MEANS)
            im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)

            data_idx = [{"data": im_tensor, "im_info": im_info}]
            data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))]
            data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx,
                                         provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]],
                                         provide_label=[None])

            scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

            tic()
            scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
            boxes = boxes[0].astype('f')
            scores = scores[0].astype('f')
            dets_nms = []
            num_dets = 0
            for j in range(1, scores.shape[1]):
                cls_scores = scores[:, j, np.newaxis]
                cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores))
                keep = nms(cls_dets)
                cls_dets = cls_dets[keep, :]
                cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :]
                dets_nms.append(cls_dets)
                num_dets += cls_dets.shape[0]
            
            print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets)
            # save results
            #im = cv2.imread(im_path + im_name)
            #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
            #im_bbox = show_boxes(im, dets_nms, classes, 1)
            #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox)
            save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False)
            #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im)
            writer.writeFrame(save_im)
            
            for out in outputs:
                vout.append([frame_idx] + out)
        
        # save the whole video detection into pickle file
        writer.close()
        with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f:
            pickle.dump(vout, f, protocol=2)
        pbar.update(1)
        
    pbar.close()    
    print 'done'
Пример #20
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    model = '/../model/rfcn_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = ['airplane', 'antelope', 'bear', 'bicycle',
               'bird', 'bus', 'car', 'cattle',
               'dog', 'domestic_cat', 'elephant', 'fox',
               'giant_panda', 'hamster', 'horse', 'lion',
               'lizard', 'monkey', 'motorcycle', 'rabbit',
               'red_panda', 'sheep', 'snake', 'squirrel',
               'tiger', 'train', 'turtle', 'watercraft',
               'whale', 'zebra']

    # load demo data
    image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    #

    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})


    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    predictor = Predictor(sym, data_names, label_names,
                          context=[mx.gpu(0)], max_data_shapes=max_data_shape,
                          provide_data=provide_data, provide_label=provide_label,
                          arg_params=arg_params, aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)

    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx,
                                     provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]],
                                     provide_label=[None])
        scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config)
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time/count)

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)

        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename,out_im)

    print 'done'
Пример #21
0
def gpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4,
                    device_id=0):  #0.4
    """
    A wrapper function, note we already know the class of boxes and masks
    """
    nms = gpu_nms_wrapper(nms_thresh, device_id)
    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = [[] for _ in xrange(num_classes)]

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    # organize helper variable for gpu mask voting
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        for i in xrange(num_boxes):
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(t_scores[c])
        class_bar[c] = len(candidate_scores)

    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)

    # the input masks/boxes are relatively large
    # select only a subset of them are useful for mask merge
    unique_inds = np.unique(candidate_inds)
    unique_inds_order = unique_inds.argsort()
    unique_map = {}
    for i in xrange(len(unique_inds)):
        unique_map[unique_inds[i]] = unique_inds_order[i]
    for i in xrange(len(candidate_inds)):
        candidate_inds[i] = unique_map[candidate_inds[i]]
    boxes = boxes[unique_inds, ...]
    masks = masks[unique_inds, ...]

    boxes = np.round(boxes)
    result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds,
                                                 candidate_start,
                                                 candidate_weights,
                                                 binary_thresh, im_height,
                                                 im_width, device_id)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    cls_start = 0
    for i in xrange(1, num_classes):
        cls_end = class_bar[i]
        cls_box = result_box[cls_start:cls_end, :]
        cls_mask = result_mask[cls_start:cls_end, :]
        valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0])
                             & (cls_box[:, 3] > cls_box[:, 1]))[0]
        ########################
        # cls_box = cls_box[valid_ind, :]
        # cls_mask = cls_mask[valid_ind, :]

        # #print 'cls_box', cls_box
        # def nms(dets, thresh):
        #     """
        #     greedily select boxes with high confidence and overlap with current maximum <= thresh
        #     rule out overlap >= thresh
        #     :param dets: [[x1, y1, x2, y2 score]]
        #     :param thresh: retain overlap < thresh
        #     :return: indexes to keep
        #     """
        #     if dets.shape[0] == 0:
        #         return []

        #     x1 = dets[:, 0]
        #     y1 = dets[:, 1]
        #     x2 = dets[:, 2]
        #     y2 = dets[:, 3]
        #     scores = dets[:, 4]

        #     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        #     order = scores.argsort()[::-1]

        #     keep = []
        #     while order.size > 0:
        #         i = order[0]
        #         keep.append(i)
        #         xx1 = np.maximum(x1[i], x1[order[1:]])
        #         yy1 = np.maximum(y1[i], y1[order[1:]])
        #         xx2 = np.minimum(x2[i], x2[order[1:]])
        #         yy2 = np.minimum(y2[i], y2[order[1:]])

        #         w = np.maximum(0.0, xx2 - xx1 + 1)
        #         h = np.maximum(0.0, yy2 - yy1 + 1)
        #         inter = w * h
        #         ovr = inter / (areas[i] + areas[order[1:]] - inter)

        #         inds = np.where(ovr <= thresh)[0]
        #         order = order[inds + 1]

        #     return keep

        # #print 'aaaaaaaa'
        # keep = nms(cls_box, 0.3) #bei niedrigen treshhold wirfts welche raus
        # #print 'aa', len(keep), len(boxes_scored_ar)
        # #print 'keep', keep

        # #print 'a', len(boxes_scored_ar)
        # #print 'b', len(boxes_scored_ar[keep, :])
        # cls_box = cls_box[keep, :]
        # cls_mask = cls_mask[keep, :]

        # # print 'cls_box', cls_box
        # # print 'cls_mask', cls_mask

        # list_result_box[i] = cls_box
        # list_result_mask[i] = cls_mask

        #################

        list_result_box[i] = cls_box[valid_ind, :]  #auscommenten wenn nms an.
        list_result_mask[i] = cls_mask[valid_ind, :]  #auscommenten wehn nms an
        cls_start = cls_end

    return list_result_mask, list_result_box
Пример #22
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_rfcn'
    model = '/../model/rfcn_dff_flownet_vid'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00007010/*.JPEG')
    output_dir = cur_path + '/../demo/rfcn_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'im_info', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in range(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in range(len(data))]
    provide_label = [None for i in range(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 0, process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in range(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)

    print("warmup done")
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in range(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            scores, boxes, data_dict, feat = im_detect(key_predictor,
                                                       data_batch, data_names,
                                                       scales, config)
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch,
                                                    data_names, scales, config)
        time += toc()
        count += 1
        print('testing {} {:.4f}s'.format(im_name, time / count))

        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)

    print('done')
def main(tempFileList, fileOp):
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn_dcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    out_dir = os.path.join(
        cur_path,
        'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages'
    )
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    # set up class names
    num_classes = 7
    classes = [
        'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag',
        'isis flag'
    ]

    # load demo data
    image_names = tempFileList
    data = []
    for im_name in image_names:
        im_file = im_name
        print(im_file)
        im = cv2.imread(im_file, cv2.IMREAD_COLOR)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + '/demo/models/' +
                                        ('rfcn_voc'),
                                        10,
                                        process=True)
    #modify by zxt
    #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[0]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[0])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)

    # test
    # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w')
    fileOp = fileOp
    for idx, im_name in enumerate(image_names):
        print("begining process %s" % (im_name))
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1)
        cv2.imwrite(out_dir + im_name.split('/')[-1], im_result)
    print 'done'
Пример #24
0
    def forward(self, is_train, req, in_data, out_data, aux):
        before_pyramid_proposal = datetime.now()
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        LAYER_NUM = len(in_data) / 2
        LAYER_NUM = 11
        if LAYER_NUM == 7:
            cls_prob_dict = {
                'stride64': in_data[6],
                'stride32': in_data[5],
                'stride16': in_data[4],
                'stride8': in_data[3],
                'stride4': in_data[2],
                'stride2': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[13],
                'stride32': in_data[12],
                'stride16': in_data[11],
                'stride8': in_data[10],
                'stride4': in_data[9],
                'stride2': in_data[8],
                'stride1': in_data[7],
            }

        elif LAYER_NUM == 6:
            cls_prob_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride16': in_data[3],
                'stride8': in_data[2],
                'stride4': in_data[1],
                'stride2': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[11],
                'stride32': in_data[10],
                'stride16': in_data[9],
                'stride8': in_data[8],
                'stride4': in_data[7],
                'stride2': in_data[6],
            }

        elif LAYER_NUM == 5:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
                'stride16': in_data[2],
                'stride8': in_data[1],
                'stride4': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
                'stride16': in_data[7],
                'stride8': in_data[6],
                'stride4': in_data[5],
            }
        elif LAYER_NUM == 2:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
            }
        elif LAYER_NUM == 11:
            cls_prob_dict = {
                'stride64': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[1],
            }
        elif LAYER_NUM == 1:
            cls_prob_dict = {
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride1': in_data[1],
            }
        elif LAYER_NUM == 3:
            cls_prob_dict = {
                'stride64': in_data[2],
                'stride32': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride1': in_data[3],
            }
        '''
        cls_prob_dict = {
            'stride8': in_data[3],
            'stride4': in_data[2],
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride8': in_data[7],
            'stride4': in_data[6],
            'stride2': in_data[5],
            'stride1': in_data[4],
        }
        '''
        '''
        cls_prob_dict = {
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride2': in_data[3],
            'stride1': in_data[2],
        }        
        '''
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []

        channel_list = []

        before_feat = datetime.now()

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape)
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]

            if DEBUG:
                scores1 = cls_prob_dict['stride' + str(s)].asnumpy()
                print "scores.shape:" + str(scores.shape)
                print "scores1.shape:" + str(scores1.shape)

            #print "scores.shape:"+str(scores.shape)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            #print "bbox_deltas.shape:"+str(bbox_deltas.shape)
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts

            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            before_enume = datetime.now()
            A = self._num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            after_enume = datetime.now()
            #print "enume time:"+str((after_enume-before_enume).seconds)
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order

            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))

            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
            if DEBUG:
                print "scores[:100]:" + str(scores[:50])
            channels = np.ones((scores.shape)) * stride

            # Convert anchors into proposals via bbox transformations
            before_pred = datetime.now()
            proposals = bbox_pred(anchors, bbox_deltas)
            after_pred = datetime.now()
            #print "pred_time:"
            #print (after_pred-before_pred).seconds
            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])
            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            if DEBUG:
                print str(min_size)
                print str(im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            if DEBUG:
                print "proposals3:" + str(proposals[0:10])
            scores = scores[keep]

            channels = channels[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
            channel_list.append(channels)
        after_feat = datetime.now()
        #print "feat time:"
        #print (after_feat-before_feat).seconds

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)
        channels = np.vstack(channel_list)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        before_sort = datetime.now()
        order = scores.ravel().argsort()[::-1]
        after_sort = datetime.now()
        #print "sort time:"
        #print (after_sort-before_sort).seconds
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        channels = channels[order]
        if DEBUG:
            print '-------1-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)

        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        channels = channels[keep]
        if DEBUG:
            print '-------2-------'
            print channels.shape
            for s in self._feat_stride:
                print "stride:" + str(s)
                print len(np.where(channels == float(s))[0])
            print "proposals:" + str(proposals[0:20])
            print "scores:" + str(scores[0:20])
        f_chan = open('channels.txt', 'w')
        for ii in range(channels.shape[0]):
            f_chan.write(str(channels[ii][0]) + ' ')
        f_chan.close()

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        #print "out_data[0].shape"+str(out_data[0].shape)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
        after_pyramid_proposal = datetime.now()
Пример #25
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        if LAYER_NUM == 7:
            cls_prob_dict = {
                'stride64': in_data[6],
                'stride32': in_data[5],
                'stride16': in_data[4],
                'stride8': in_data[3],
                'stride4': in_data[2],
                'stride2': in_data[1],
                'stride1': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[13],
                'stride32': in_data[12],
                'stride16': in_data[11],
                'stride8': in_data[10],
                'stride4': in_data[9],
                'stride2': in_data[8],
                'stride1': in_data[7],
            }

        elif LAYER_NUM == 6:
            cls_prob_dict = {
                'stride64': in_data[5],
                'stride32': in_data[4],
                'stride16': in_data[3],
                'stride8': in_data[2],
                'stride4': in_data[1],
                'stride2': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[11],
                'stride32': in_data[10],
                'stride16': in_data[9],
                'stride8': in_data[8],
                'stride4': in_data[7],
                'stride2': in_data[6],
            }

        elif LAYER_NUM == 5:
            cls_prob_dict = {
                'stride64': in_data[4],
                'stride32': in_data[3],
                'stride16': in_data[2],
                'stride8': in_data[1],
                'stride4': in_data[0],
            }
            bbox_pred_dict = {
                'stride64': in_data[9],
                'stride32': in_data[8],
                'stride16': in_data[7],
                'stride8': in_data[6],
                'stride4': in_data[5],
            }
        '''
        cls_prob_dict = {
            'stride8': in_data[3],
            'stride4': in_data[2],
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride8': in_data[7],
            'stride4': in_data[6],
            'stride2': in_data[5],
            'stride1': in_data[4],
        }
        '''
        '''
        cls_prob_dict = {
            'stride2': in_data[1],
            'stride1': in_data[0],
        }
        bbox_pred_dict = {
            'stride2': in_data[3],
            'stride1': in_data[2],
        }        
        '''
        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        proposal_list = []
        score_list = []
        channel_record_list = []
        crop_nums = 9

        for s in self._feat_stride:
            stride = int(s)
            sub_anchors = generate_anchors(base_size=stride,
                                           scales=self._scales,
                                           ratios=self._ratios)
            #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape)
            #print cls_prob_dict['stride' + str(s)].asnumpy().shape
            scores = cls_prob_dict['stride' +
                                   str(s)].asnumpy()[:,
                                                     self._num_anchors:, :, :]
            #print "scores.shape:"+str(scores.shape)
            bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy()
            #print "bbox_deltas.shape:"+str(bbox_deltas.shape)
            im_info = in_data[-1].asnumpy()[0, :]
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = int(im_info[0] / stride), int(im_info[1] / stride)

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self._num_anchors
            K = shifts.shape[0]
            temp_anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            temp_anchors = temp_anchors.reshape((K * A, 4))
            anchors = np.zeros((0, 4))
            channel_records = np.zeros((0, 1))
            for channel in range(crop_nums):
                anchors = np.vstack((anchors, temp_anchors))
                channels = np.ones(K * A) * channel
                channels = channels.reshape((-1, 1))
                channel_records = np.vstack((channel_records, channels))
            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])

            #print "proposals.shape"
            #print proposals.shape
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            #print "scores.shape"
            #print scores.shape
            scores = scores[keep]

            channel_records = channel_records[keep]

            proposal_list.append(proposals)
            score_list.append(scores)
            channel_record_list.append(channel_records)

        channel_records = np.vstack(channel_record_list)
        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        channel_records = channel_records[order]
        #print "channel_records:"
        #print channel_records
        #print channel_records.shape
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        # 9. nms on different channel
        keeps = np.zeros(0)
        avg_post_nms_topN = int(post_nms_topN / crop_nums)
        for i in range(crop_nums):
            channel_index = np.where(channel_records == i)[0]
            temp_ch_proposals = proposals[channel_index, :]
            #print proposals.shape
            #print temp_ch_proposals.shape
            temp_scores = scores[channel_index]
            #print temp_scores.shape
            det = np.hstack(
                (temp_ch_proposals, temp_scores)).astype(np.float32)
            #print det.shape
            #keep = np.zeros(1)
            if det.shape[0] > 0:
                keep = nms(det)
                if avg_post_nms_topN > 0:
                    keep = keep[:avg_post_nms_topN]
                # pad to ensure output size remains unchanged
                if len(keep) < avg_post_nms_topN:
                    pad = npr.choice(keep, size=avg_post_nms_topN - len(keep))
                    keep = np.hstack((keep, pad))
                keeps = np.hstack((keeps, channel_index[keep])).astype(np.int)

        proposals = proposals[keeps, :]
        scores = scores[keeps]
        channel_records = channel_records[keeps]
        #proposals.hstack((proposals,channel_records))
        #print channel_records.shape
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        # if is_train:
        self.assign(out_data[0], req[0], blob)
        #print "out_data[0].shape"+str(out_data[0].shape)
        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Пример #26
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)

        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()
        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
            print('scale: {}'.format(im_info[2]))

        # 1. Generate proposals from bbox_deltas and shifted anchors
        # use real image size instead of padded feature map sizes
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        if DEBUG:
            print('score map size: {}'.format(scores.shape))
            print("resudial: {}".format(
                (scores.shape[2] - height, scores.shape[3] - width)))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Пример #27
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'impression_network_dynamic_offset_sparse'
    model = '/../local_run_output/impression_dynamic_offset-lr-10000-times-neighbor-4-dense-4'
    first_sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym_instance = eval(config.symbol + '.' + config.symbol)()
    cur_sym_instance = eval(config.symbol + '.' + config.symbol)()

    first_sym = first_sym_instance.get_first_test_symbol_impression(config)
    key_sym = key_sym_instance.get_key_test_symbol_impression(config)
    cur_sym = cur_sym_instance.get_cur_test_symbol_impression(config)

    # set up class names
    num_classes = 31
    classes = [
        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
        'watercraft', 'whale', 'zebra'
    ]

    # load demo data
    image_names = glob.glob(cur_path +
                            '/../demo/ILSVRC2015_val_00011005/*.JPEG')
    output_dir = cur_path + '/../demo/motion-prior-output-00011005/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = 10
    image_names.sort()
    data = []
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            if idx == 0:
                data_oldkey = im_tensor.copy()
                data_newkey = im_tensor.copy()
                data_cur = im_tensor.copy()
            else:
                data_oldkey = data_newkey.copy()
                data_newkey = im_tensor
        else:
            data_cur = im_tensor
        shape = im_tensor.shape
        infer_height = int(np.ceil(shape[2] / 16.0))
        infer_width = int(np.ceil(shape[3] / 16.0))
        data.append({
            'data_oldkey':
            data_oldkey,
            'data_newkey':
            data_newkey,
            'data_cur':
            data_cur,
            'im_info':
            im_info,
            'impression':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)),
            'key_feat_task':
            np.zeros(
                (1, config.network.DFF_FEAT_DIM, infer_height, infer_width))
        })

    # get predictor
    data_names = [
        'data_oldkey', 'data_cur', 'data_newkey', 'im_info', 'impression',
        'key_feat_task'
    ]
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data_oldkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_newkey', (1, 3, max([v[0] for v in config.SCALES]),
                         max([v[1] for v in config.SCALES]))),
        ('data_cur', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
        ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63))
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(cur_path + model, 4, process=True)
    first_predictor = Predictor(first_sym,
                                data_names,
                                label_names,
                                context=[mx.gpu(0)],
                                max_data_shapes=max_data_shape,
                                provide_data=provide_data,
                                provide_label=provide_label,
                                arg_params=arg_params,
                                aux_params=aux_params)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)
    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:  # keyframe
            if j == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)
                scores, boxes, data_dict, conv_feat, impression, feat_task = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, _ = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
    print "warmup done"
    # test
    time = 0
    count = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][3].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        tic()
        print(idx)
        if idx % key_frame_interval == 0:  # keyframe
            if idx == 0:  # first frame
                scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online(
                    first_predictor, data_batch, data_names, scales, config)
                feat_task = conv_feat
                impression = conv_feat
                feat_task_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx), feat_task_numpy)
            else:  # keyframe
                data_batch.data[0][-2] = impression
                data_batch.provide_data[0][-2] = ('impression',
                                                  impression.shape)

                scores, boxes, data_dict, conv_feat, impression, feat_task, _ = im_detect_impression_online(
                    key_predictor, data_batch, data_names, scales, config)
                feat_task_key_numpy = feat_task.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_key_numpy)
        else:  # current frame
            data_batch.data[0][-1] = feat_task
            data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape)
            scores, boxes, data_dict, _, _, _, feat_task_cur = im_detect_impression_online(
                cur_predictor, data_batch, data_names, scales, config)
            if idx >= 1:
                feat_task_cur_numpy = feat_task_cur.asnumpy()
                np.save("features/impression_%s.npy" % (idx),
                        feat_task_cur_numpy)
                #import pdb;pdb.set_trace()
        time += toc()
        count += 1
        print 'testing {} {:.4f}s'.format(im_name, time / count)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        # visualize
        im = cv2.imread(im_name)
        #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        # show_boxes(im, dets_nms, classes, 1)
        out_im = draw_boxes(im, dets_nms, classes, 1)
        _, filename = os.path.split(im_name)
        cv2.imwrite(output_dir + filename, out_im)
    print 'done'
Пример #28
0
# provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))]
# provide_label = [None for i in xrange(len(data))]
# arg_params, aux_params = load_param(cur_path + model, 0, process=True)
# key_predictor = Predictor(key_sym, data_names, label_names,
#                         context=[ctx], max_data_shapes=max_data_shape,
#                         provide_data=provide_data, provide_label=provide_label,
#                         arg_params=arg_params, aux_params=aux_params)
# cur_predictor = Predictor(cur_sym, data_names, label_names,
#                         context=[ctx], max_data_shapes=max_data_shape,
#                         provide_data=provide_data, provide_label=provide_label,
#                         arg_params=arg_params, aux_params=aux_params)

if device_name == 'cpu':
    nms = cpu_nms_wrapper(config.TEST.NMS)
else:
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

# print data[0]['data'].shape

#%%
from collections import namedtuple
BatchKeyFeat = namedtuple('BatchKeyFeat', ['data'])
BatchKeyRpn = namedtuple('BatchKeyRpn', ['conv_feat', 'im_info'])
BatchKey = namedtuple('BatchKey', ['data', 'im_info', 'data_key', 'feat_key'])

# lists to store running time.
time_list_key_feat = []
time_list_key_rpn = []
time_list_key = []

time_list_cur_flow = []
Пример #29
0
def inference_rcnn_UADETRAC(cfg,
                            dataset,
                            image_set,
                            root_path,
                            dataset_path,
                            ctx,
                            prefix,
                            epoch,
                            vis,
                            ignore_cache,
                            shuffle,
                            has_rpn,
                            proposal,
                            thresh,
                            logger=None,
                            output_path=None):
    if not logger:
        assert False, 'require a logger'

    # print cfg
    pprint.pprint(cfg)
    logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg)))

    # load symbol and testing data
    if has_rpn:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol(cfg, is_train=False)
        imdb = eval(dataset)(image_set,
                             root_path,
                             dataset_path,
                             result_path=output_path)
        #roidb = imdb.gt_roidb_Shuo()
        roidb = imdb.gt_roidb()
    else:
        sym_instance = eval(cfg.symbol + '.' + cfg.symbol)()
        sym = sym_instance.get_symbol_rfcn(cfg, is_train=False)
        imdb = eval(dataset)(image_set,
                             root_path,
                             dataset_path,
                             result_path=output_path)
        gt_roidb = imdb.gt_roidb_Shuo()
        roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb)

    print 'len(roidb):', len(roidb)
    # get test data iter
    test_data = TestLoader(roidb,
                           cfg,
                           batch_size=len(ctx),
                           shuffle=shuffle,
                           has_rpn=has_rpn)

    # load model
    arg_params, aux_params = load_param(prefix, epoch, process=True)
    print 'inferring: ', prefix, ' epoch: ', epoch
    """# write parameters to file
    print 'type(arg_params):',type(arg_params)
    print 'type(aux_params):',type(aux_params)
    thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w')
    thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w')
    for item_arg in arg_params.items():
	thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n')
    for item_aux in aux_params.items():
	thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n')
    """

    # infer shape
    data_shape_dict = dict(test_data.provide_data_single)
    sym_instance.infer_shape(data_shape_dict)

    sym_instance.check_parameter_shapes(arg_params,
                                        aux_params,
                                        data_shape_dict,
                                        is_train=False)

    # decide maximum shape
    data_names = [k[0] for k in test_data.provide_data_single]
    label_names = None
    max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]),
                                 max([v[1] for v in cfg.SCALES])))]]
    if not has_rpn:
        max_data_shape.append(
            ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5)))

    # create predictor
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=ctx,
                          max_data_shapes=max_data_shape,
                          provide_data=test_data.provide_data,
                          provide_label=test_data.provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)

    nms = gpu_nms_wrapper(cfg.TEST.NMS, 0)
    # start detection
    # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
    print 'test_data.size', test_data.size
    print 'test_data:', test_data
    print 'data_names:', data_names
    print 'test_data.provide_data:', test_data.provide_data
    print 'test_data.provide_label:', test_data.provide_label
    nnn = 0
    classes = ['__background', 'vehicle']
    #num_classes = 10
    #classes = ['__DontCare__','Car','Suv','SmallTruck','MediumTruck','LargeTruck','Pedestrian','Bus','Van','GroupofPeople']
    for im_info, data_batch in test_data:
        print nnn
        #print 'roidb[nnn]:',roidb[nnn]['image']
        image_name = roidb[nnn]['image']
        tic()
        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, data_dict_all = im_detect(
            predictor, data_batch, data_names, scales, cfg)
        boxes = boxes_all[0].astype('f')
        scores = scores_all[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j *
                                                                    4:(j + 1) *
                                                                    4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            #cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(image_name, toc())
        # visualize
        im = cv2.imread(image_name)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

        #print 'cls_dets:',cls_dets
        #show_boxes(im, dets_nms, classes, 1)
        nnn = nnn + 1
        image_name_length = len(image_name.split('/'))
        sequence_name = image_name.split('/')[image_name_length - 2]
        output_file = os.path.join(
            '/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC',
            'Outputs', sequence_name + '_Det_DFCN.txt')
        frame_id = int(image_name.split('/')[image_name_length - 1][3:8])

        thefile = open(output_file, 'a')

        det_id = 0
        for x_small, y_small, x_large, y_large, prob in dets_nms[0]:
            det_id += 1
            thefile.write(
                str(frame_id) + ',' + str(det_id) + ',' + str(x_small) + ',' +
                str(y_small) + ',' + str(max(x_large - x_small, 0.001)) + ',' +
                str(max(y_large - y_small, 0.001)) + ',' + str(prob) + '\n')
Пример #30
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_flownet_deeplab'
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/deeplab_dcn_cityscapes'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    # settings
    num_classes = 19
    interv = args.interval
    num_ex = args.num_ex

    # load demo data
    image_names = sorted(
        glob.glob(cur_path +
                  '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' +
                  str(interv) + '/*.png'))
    image_names = image_names[:interv * num_ex]
    label_files = sorted(
        glob.glob(
            cur_path +
            '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png'))

    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #

    data = []
    key_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            key_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    # models: rfcn_dff_flownet_vid, deeplab_cityscapes
    arg_params, aux_params = load_param_multi(cur_path + model1,
                                              cur_path + model2,
                                              0,
                                              process=True)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\nframe {} (key)'.format(idx)
            # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config)
            output_all, _ = im_segment(cur_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'
Пример #31
0
def process_one_batch_images_fun(isUrlFlag=False,
                                 one_batch_images_list=None,
                                 init_model_param=None,
                                 fileOp=None,
                                 vis=False):
    # init_model_param list : [sym, arg_params, aux_params]

    num_classes = 11  # 0 is background,
    # classes = ['tibetan flag', 'guns', 'knives',
    #            'not terror', 'islamic flag', 'isis flag']
    classes = [
        'islamic flag', 'isis flag', 'tibetan flag', 'knives_true',
        'guns_true', 'knives_false', 'knives_kitchen', 'guns_anime',
        'guns_tools', 'not terror'
    ]
    image_names = one_batch_images_list
    if len(image_names) <= 0:
        return
    all_can_read_image = []
    data = []
    for im_name in image_names:
        #print("process : %s"%(im_name))
        im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name)
        # 判断 这个图片是否可读
        if np.shape(im) == ():
            print("ReadImageError : %s" % (im_name))
            continue
        all_can_read_image.append(im_name)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    predictor = Predictor(init_model_param[0],
                          data_names,
                          label_names,
                          context=[mx.gpu(int(args.gpuId))],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=init_model_param[1],
                          aux_params=init_model_param[2])
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    for idx, im_name in enumerate(all_can_read_image):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > args.threshold, :]
            dets_nms.append(cls_dets)
        print('testing {} {:.4f}s'.format(im_name, toc()))
        show_boxes(isUrlFlag=isUrlFlag,
                   im_name=im_name,
                   dets=dets_nms,
                   classes=classes,
                   scale=1,
                   vis=vis,
                   fileOp=fileOp,
                   flag=args.outputFileFlag)
    print('process one batch images done')
    pass
Пример #32
0
def main():
    # get symbol
    pprint.pprint(config)
    config.symbol = 'resnet_v1_101_rfcn'
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    sym = sym_instance.get_symbol(config, is_train=False)

    # set up class names; Don't count the background in, even we are treat the background as label '0'
    num_classes = 4
    classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights']

    # load demo data
    image_path = './data/RoadImages/test/'
    image_names = glob.glob(image_path + '*.jpg')

    print("Image amount {}".format(len(image_names)))
    data = []
    for im_name in image_names:
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][1]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        data.append({'data': im_tensor, 'im_info': im_info})

    # get predictor
    data_names = ['data', 'im_info']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]),
                                 max([v[1] for v in config.SCALES])))]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]
    arg_params, aux_params = load_param(
        './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road',
        19,
        process=True)
    predictor = Predictor(sym,
                          data_names,
                          label_names,
                          context=[mx.gpu(0)],
                          max_data_shapes=max_data_shape,
                          provide_data=provide_data,
                          provide_label=provide_label,
                          arg_params=arg_params,
                          aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # test
    notation_dict = {}
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        scores, boxes, data_dict = im_detect(predictor, data_batch, data_names,
                                             scales, config)
        boxes = boxes[0].astype('f')
        scores = scores[0].astype('f')
        dets_nms = []
        for j in range(1, scores.shape[1]):
            cls_scores = scores[:, j, np.newaxis]
            cls_boxes = boxes[:,
                              4:8] if config.CLASS_AGNOSTIC else boxes[:, j *
                                                                       4:(j +
                                                                          1) *
                                                                       4]
            cls_dets = np.hstack((cls_boxes, cls_scores))
            keep = nms(cls_dets)
            cls_dets = cls_dets[keep, :]
            cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :]
            dets_nms.append(cls_dets)
        print 'testing {} {:.4f}s'.format(im_name, toc())
        # notation_list.append(get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True))
        notation_dict.update(
            get_notation(im_name,
                         dets_nms,
                         classes,
                         scale=1.0,
                         gen_bbox_pic=True))
    save_notation_file(notation_dict)
    print 'done'
Пример #33
0
def main():

    # settings
    num_classes = 19
    snip_len = 30
    version = str(args.version)
    interv = args.interval
    num_ex = args.num_ex
    avg_acc = args.avg_acc

    # validate params
    if version not in ['18', '34', '50', '101']:
        raise ValueError(
            "Invalid Accel version '%s' - must be one of Accel-{18,34,50,101}"
            % version)
    if interv < 1:
        raise ValueError("Invalid interval %d - must be >=1" % interv)
    if num_ex < 1:
        raise ValueError("Invalid num_ex %d - must be >=1" % num_ex)

    # get symbol
    pprint.pprint(config)
    config.symbol = 'accel_' + version
    model1 = '/../model/rfcn_dff_flownet_vid'
    model2 = '/../model/accel-' + version
    sym_instance = eval(config.symbol + '.' + config.symbol)()
    key_sym = sym_instance.get_key_test_symbol(config)
    cur_sym = sym_instance.get_cur_test_symbol(config)

    path_demo_data = '/ebs/Accel/data/cityscapes/'
    path_demo_labels = '/ebs/Accel/data/cityscapes/'
    if path_demo_data == '' or path_demo_labels == '':
        raise ValueError("Must set path to demo data + labels")

    # load demo data
    image_names = sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/frankfurt/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/lindau/*.png'))
    image_names += sorted(
        glob.glob(path_demo_data + 'leftImg8bit_sequence/val/munster/*.png'))
    image_names = image_names[:snip_len * num_ex]
    label_files = sorted(
        glob.glob(path_demo_labels + 'gtFine/val/frankfurt/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/lindau/*trainIds.png'))
    label_files += sorted(
        glob.glob(path_demo_labels + 'gtFine/val/munster/*trainIds.png'))
    output_dir = cur_path + '/../demo/deeplab_dff/'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    key_frame_interval = interv

    #
    lb_pos = 19
    image_names_trunc = []
    for i in range(num_ex):
        snip_pos = i * snip_len
        if avg_acc:
            offset = i % interv
        else:
            offset = interv - 1
        start_pos = lb_pos - offset
        image_names_trunc.extend(image_names[snip_pos + start_pos:snip_pos +
                                             start_pos + interv])
    image_names = image_names_trunc

    data = []
    key_im_tensor = None
    prev_im_tensor = None
    for idx, im_name in enumerate(image_names):
        assert os.path.exists(im_name), ('%s does not exist'.format(im_name))
        im = cv2.imread(im_name,
                        cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        target_size = config.SCALES[0][0]
        max_size = config.SCALES[0][1]
        im, im_scale = resize(im,
                              target_size,
                              max_size,
                              stride=config.network.IMAGE_STRIDE)
        im_tensor = transform(im, config.network.PIXEL_MEANS)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)
        if idx % key_frame_interval == 0:
            key_im_tensor = im_tensor
        if prev_im_tensor is None:
            prev_im_tensor = im_tensor
        data.append({
            'data':
            im_tensor,
            'im_info':
            im_info,
            'data_key':
            prev_im_tensor,
            'feat_key':
            np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1))
        })
        prev_im_tensor = im_tensor

    # get predictor
    data_names = ['data', 'data_key', 'feat_key']
    label_names = []
    data = [[mx.nd.array(data[i][name]) for name in data_names]
            for i in xrange(len(data))]
    max_data_shape = [[
        ('data', (1, 3, max([v[0] for v in config.SCALES]),
                  max([v[1] for v in config.SCALES]))),
        ('data_key', (1, 3, max([v[0] for v in config.SCALES]),
                      max([v[1] for v in config.SCALES]))),
    ]]
    provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])]
                    for i in xrange(len(data))]
    provide_label = [None for i in xrange(len(data))]

    arg_params, aux_params = load_param(cur_path + model1, 0, process=True)
    arg_params_dcn, aux_params_dcn = load_param(cur_path + model2,
                                                0,
                                                process=True)
    arg_params.update(arg_params_dcn)
    aux_params.update(aux_params_dcn)
    key_predictor = Predictor(key_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    cur_predictor = Predictor(cur_sym,
                              data_names,
                              label_names,
                              context=[mx.gpu(0)],
                              max_data_shapes=max_data_shape,
                              provide_data=provide_data,
                              provide_label=provide_label,
                              arg_params=arg_params,
                              aux_params=aux_params)
    nms = gpu_nms_wrapper(config.TEST.NMS, 0)

    # warm up
    for j in xrange(2):
        data_batch = mx.io.DataBatch(data=[data[j]],
                                     label=[],
                                     pad=0,
                                     index=0,
                                     provide_data=[[
                                         (k, v.shape)
                                         for k, v in zip(data_names, data[j])
                                     ]],
                                     provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]
        if j % key_frame_interval == 0:
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

    print "warmup done"
    # test
    time = 0
    count = 0
    hist = np.zeros((num_classes, num_classes))
    lb_idx = 0
    for idx, im_name in enumerate(image_names):
        data_batch = mx.io.DataBatch(
            data=[data[idx]],
            label=[],
            pad=0,
            index=idx,
            provide_data=[[(k, v.shape)
                           for k, v in zip(data_names, data[idx])]],
            provide_label=[None])
        scales = [
            data_batch.data[i][1].asnumpy()[0, 2]
            for i in xrange(len(data_batch.data))
        ]

        tic()
        if idx % key_frame_interval == 0:
            print '\n\nframe {} (key)'.format(idx)
            output_all, feat = im_segment(key_predictor, data_batch)
            output_all = [
                mx.ndarray.argmax(output['croped_score_output'],
                                  axis=1).asnumpy() for output in output_all
            ]
        else:
            print '\nframe {} (intermediate)'.format(idx)
            data_batch.data[0][-1] = feat
            data_batch.provide_data[0][-1] = ('feat_key', feat.shape)
            output_all, feat = im_segment(cur_predictor, data_batch)
            output_key = 'croped_score_output' if version == '101' else 'correction_output'
            output_all = [
                mx.ndarray.argmax(output[output_key], axis=1).asnumpy()
                for output in output_all
            ]

        elapsed = toc()
        time += elapsed
        count += 1
        print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed,
                                                    time / count)

        pred = np.uint8(np.squeeze(output_all))
        segmentation_result = Image.fromarray(pred)
        pallete = getpallete(256)
        segmentation_result.putpalette(pallete)
        _, im_filename = os.path.split(im_name)
        segmentation_result.save(output_dir + '/seg_' + im_filename)

        # compute accuracy
        label = None

        _, lb_filename = os.path.split(label_files[lb_idx])
        im_comps = im_filename.split('_')
        lb_comps = lb_filename.split('_')
        # check if annotation available for frame
        if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]:
            print 'label {}'.format(lb_filename)
            label = np.asarray(Image.open(label_files[lb_idx]))
            if lb_idx < len(label_files) - 1:
                lb_idx += 1

        if label is not None:
            curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes)
            hist += curr_hist
            print 'mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2))
            print '(cum) mIoU {mIoU:.3f}'.format(
                mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2))

    ious = per_class_iu(hist) * 100
    print ' '.join('{:.03f}'.format(i) for i in ious)
    print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2))

    print 'done'