def __init__(self): sym_instance = eval(config.symbol + '.' + config.symbol)() self.symbol = sym_instance.get_symbol(config, is_train=False) self.classes = ['box', 'robot'] logging.debug("Classes: {}".format(self.classes)) self.scales = config.SCALES[0] logging.debug("Scales: {}".format(self.scales)) self.data_shape_conf = [[('data', (1, 3, self.scales[0], self.scales[1])), ('im_info', (1, 3))]] self.arg_params, self.aux_params = load_param(os.path.join( cur_path, '..', 'models', "rfcn_voc"), 0, process=True) self.data_names = ['data', 'im_info'] self.predictor = Predictor(self.symbol, ['data', 'im_info'], [], context=[mx.gpu(0)], max_data_shapes=self.data_shape_conf, provide_data=self.data_shape_conf, provide_label=[None], arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) logging.info("Deformable detector initialized")
def main(): #ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] ctx = [mx.gpu(0), mx.gpu(1), mx.gpu(2), mx.gpu(3)] print args #gpu_nums = [int(i) for i in config.gpus.split(',')] gpu_nums = [0, 1, 2, 3] nms_dets = gpu_nms_wrapper(config.TEST.NMS, gpu_nums[0]) logger, final_output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) output_path = os.path.join( final_output_path, '..', '+'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) test_rcnn(config, config.dataset.dataset, config.dataset.test_image_set, config.dataset.root_path, config.dataset.dataset_path, ctx, output_path, config.TEST.test_epoch, args.vis, args.ignore_cache, args.shuffle, config.TEST.HAS_RPN, config.dataset.proposal, args.thresh, logger=logger, output_path=final_output_path, nms_dets=nms_dets, is_docker=args.is_docker)
def show_boxes_with_nms(im, scores, boxes, classes, scale, config): import matplotlib.pyplot as plt dets_nms = [] scores = scores.astype('f') boxes = boxes.astype('f') nms = gpu_nms_wrapper(config.TEST.NMS, 0) for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] * scale cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize # show_boxes(im, dets_nms, classes, 1) import cv2 im = image.transform_inverse(im, config.network.PIXEL_MEANS) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) print 'image',im print 'det_nms',len(dets_nms),dets_nms print 'classes',len(classes) out_im = draw_boxes(im, dets_nms, classes, 1) #out_im = np.transpose(np.squeeze(out_im), (1,2,0)) #print out_im.shape plt.imshow(out_im) plt.show()
def proposal_test(rpn_cls, rpn_reg, feature_shape, image_shape, ctx): # Stop gradient to stop gradient recording rpn_cls = mx.nd.stop_gradient(rpn_cls) rpn_reg = mx.nd.stop_gradient(rpn_reg) # Get basic information of the feature and the image _n, _c, f_height, f_width = feature_shape _in, _ic, img_height, img_width = image_shape rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width)) anchors_count = rpn_cls.shape[1] # Recover RPN prediction with anchors ref_anchors = generate_anchors(base_size=16, ratios=cfg.anchor_ratios, scales=cfg.anchor_scales) anchors = map_anchors(ref_anchors, rpn_reg.shape, img_height, img_width, ctx) anchors = anchors.reshape((1, -1, 4, f_height, f_width)) anchors = mx.nd.transpose(anchors, (0, 3, 4, 1, 2)) rpn_anchor_scores = mx.nd.softmax(mx.nd.transpose(rpn_cls, (0, 3, 4, 1, 2)), axis=4)[:,:,:,:,1] rpn_reg = mx.nd.transpose(rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 3, 4, 1, 2)) rpn_bbox_pred = bbox_inverse_transform(anchors.reshape((-1, 4)), rpn_reg.reshape((-1, 4))) rpn_bbox_pred = bbox_clip(rpn_bbox_pred, img_height, img_width) rpn_bbox_pred = rpn_bbox_pred.reshape((1, f_height, f_width, anchors_count, 4)) # Use NMS to filter out too many boxes rpn_bbox_pred = rpn_bbox_pred.asnumpy().reshape((-1, 4)) rpn_anchor_scores = rpn_anchor_scores.asnumpy().reshape((-1, )) rpn_bbox_proposal = np.hstack((rpn_bbox_pred, rpn_anchor_scores.reshape((rpn_anchor_scores.shape[0], 1)))) # rpn_anchor_scores, rpn_bbox_pred = nms(rpn_anchor_scores, rpn_bbox_pred, cfg.rpn_nms_thresh, use_top_n=cfg.bbox_count_before_nms) gpu_nms = gpu_nms_wrapper(cfg.rpn_nms_thresh, ctx.device_id, use_top_n=cfg.bbox_count_before_nms) keep = gpu_nms(rpn_bbox_proposal) rpn_bbox_proposal = rpn_bbox_proposal[keep][:, :4] # rpn_bbox_pred = mx.nd.array(rpn_bbox_pred, ctx) rpn_bbox_pred = mx.nd.array(rpn_bbox_proposal, ctx) del rpn_anchor_scores # Keep first cfg.rcnn_test_sample_size boxes if rpn_bbox_pred.shape[0] > cfg.rcnn_test_sample_size: rpn_bbox_pred = rpn_bbox_pred[:cfg.rcnn_test_sample_size, :] return rpn_bbox_pred
def load_data_and_get_predictor(self, image_names): # load demo data #image_names = ['COCO_test2015_000000000891.jpg', # 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: #assert os.path.exists( # cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) #im = cv2.imread(cur_path + '/../demo/' + im_name, # cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor self.data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in self.data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max( [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] self.predictor = Predictor(self.sym, self.data_names, label_names, context=[mx.gpu(1)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) return data
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def main(): # get symbol pprint.pprint(config) # config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' config.symbol = 'resnet_v1_101_fpn_rcnn_rotbox_light_head_RoITransformer' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 15 classes = ['__background__', # always index 0 'plane', 'baseball-diamond', 'bridge', 'ground-track-field', 'small-vehicle', 'large-vehicle', 'ship', 'tennis-court', 'basketball-court', 'storage-tank', 'soccer-ball-field', 'roundabout', 'harbor', 'swimming-pool', 'helicopter'] # load demo data image_names = ['P0004__1__0___0.png', 'P0053__1__0___0.png', 'P0060__1__1648___824.png'] data = [] for im_name in image_names: # pdb.set_trace() assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) # TODO: change this path arg_params, aux_params = load_param(r'/home/dj/code/Deformable_FPN_DOTA/output/fpn/DOTA/resnet_v1_101_dota_rotbox_light_head_Rroi_v6_trainval_fpn_end2end/train/fpn_DOTA_oriented', config.TEST.test_epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect_rotbox_Rroi(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect_rotbox_Rroi(predictor, data_batch, data_names, scales, config) # boxes = boxes[0].astype('f') # scores = scores[0].astype('f') boxes = boxes[0].astype('float64') scores = scores[0].astype('float64') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] # cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_boxes = boxes[:, 8:16] if config.CLASS_AGNOSTIC else boxes[:, j * 8:(j + 1) * 8] cls_quadrangle_dets = np.hstack((cls_boxes, cls_scores)) # keep = nms(cls_dets) keep = py_cpu_nms_poly(cls_quadrangle_dets, 0.3) cls_quadrangle_dets = cls_quadrangle_dets[keep, :] cls_quadrangle_dets = cls_quadrangle_dets[cls_quadrangle_dets[:, -1] > 0.7, :] dets_nms.append(cls_quadrangle_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize # im = cv2.imread(cur_path + '/../demo/' + im_name) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # pdb.set_trace() im = draw_all_poly_detection(data_dict[0]['data'].asnumpy(), dets_nms, classes[1:], data[idx][1].asnumpy()[0][2], config, threshold=0.2) cv2.imwrite(cur_path + '/../demo/' + 'results' + im_name, im) # show_boxes(im, dets_nms, classes, 1) print 'done'
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, 0) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores_list = in_data[0].asnumpy() #[1,n] #print 'score_list shape:',scores_list.shape bbox_deltas_list = in_data[1].asnumpy() #[1,n*2] im_info = in_data[2].asnumpy()[0, :] p2_shape = in_data[3].asnumpy().shape p3_shape = in_data[4].asnumpy().shape p4_shape = in_data[5].asnumpy().shape p5_shape = in_data[6].asnumpy().shape p6_shape = in_data[7].asnumpy().shape feat_shape = [] feat_shape.append(p2_shape) feat_shape.append(p3_shape) feat_shape.append(p4_shape) feat_shape.append(p5_shape) feat_shape.append(p6_shape) #t = time.time() #print 'feat_shape:', feat_shape num_feat = len(feat_shape) #[1,5,4] score_index_start = 0 bbox_index_start = 0 keep_proposal = [] keep_scores = [] #t_1 = time.time() for i in range(num_feat): feat_stride = int(self._feat_stride[i]) #4,8,16,32,64 #print 'feat_stride:', feat_stride anchor = generate_anchors(feat_stride, scales=self._scales, ratios=self._ratios) num_anchors = anchor.shape[0] #3 height = feat_shape[i][2] width = feat_shape[i][3] shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors #3 K = shifts.shape[0] #height*width anchors = anchor.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) #3*height*widht,4 scores = (scores_list[ 0, int(score_index_start):int(score_index_start + K * A * 2)]).reshape( (1, int(2 * num_anchors), -1, int(width))) #1,2*3,h,w scores = scores[:, num_anchors:, :, :] #1,3,h,w bbox_deltas = (bbox_deltas_list[ 0, int(bbox_index_start):int(bbox_index_start + K * A * 4)]).reshape( (1, int(4 * num_anchors), -1, int(width))) #1,4*3,h,w score_index_start += K * A * 2 bbox_index_start += K * A * 4 bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) #[1,h,w,12]--->[1*h*w*3,4] scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #[1,h,w,3]--->[1*h*w*3,1] proposals = bbox_pred(anchors, bbox_deltas) #debug here, corresponding? proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size[i] * im_info[2]) keep_proposal.append(proposals[keep, :]) keep_scores.append(scores[keep]) proposals = keep_proposal[0] scores = keep_scores[0] for i in range(1, num_feat): proposals = np.vstack((proposals, keep_proposal[i])) scores = np.vstack((scores, keep_scores[i])) #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #t_2 = time.time() order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #t_nms = time.time() det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:, 2] = 16 proposals[:, 3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2) # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def main(): # get symbol pprint.pprint(config) #config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" config.symbol = "resnet_v1_101_fpn_dcn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 5 classes = ["car", "bus", "van", "others"] # load demo videos im_path = '../../aic2018/track1/images/' image_names = [ x for x in os.listdir('../../aic2018/track1/images/') if (x.endswith(".jpg") and (x.startswith("9_1") or x.startswith("9_1")) ) and not x.endswith("_bbox.jpg") ] data = [] for idx, im_name in enumerate(image_names[:1]): if idx == 0: assert os.path.exists(im_path + im_name), ( '%s does not exist'.format(im_path + im_name)) im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: data.append({'data': None, 'im_info': None}) print(data) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[0][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # what does provide_data and provide_label work for? provide_data = [[(k, v.shape) for k, v in zip(data_names, data[0])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] ## load parameters arg_params, aux_params = load_param(cur_path + '/../model/' + 'fpn_detrac', 1, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") # find all videos video_path = "../../tmp" video_files = [x for x in os.listdir(video_path) if x.endswith(".mp4")] save_path = "../../tmp/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf), 'ffmpeg') vout = [] for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[ mx.nd.array(data_idx[i][name]) for name in data_names ] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch( data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} the {} th frame at {:.4f}s, detections {}'.format( vf, frame_idx, toc(), len(dets_nms)) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file with open(os.path.join(save_path, vf.replace(".mp4", ".pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = "resnet_v1_101_fpn_dcn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['test_city_inter.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.5, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) save_im, _ = show_boxes(im, dets_nms, classes, 1) cv2.imwrite(cur_path + '/../demo/' + im_name.replace(".jpg", "_out.jpg"), save_im) print 'done'
ctx = [mx.gpu(gpu_id)] update_config( "experiments/fpn/cfgs/resnet_v1_101_coco_trainval_fpn_dcn_end2end_ohem.yaml" ) sym = get_symbol(config) net = SymbolBlock(sym=sym, input_names=["data", "im_info"], pretrained="fpn_coco-3-0.0.params") net.collect_params().reset_ctx(ctx) im_names = list( lsdir("/data1/zyx/yks/dataset/guangdong_round2_test_a_20181011", suffix=".jpg")) # shuffle(im_names) nms_wrapper = nms = gpu_nms_wrapper(config.TEST.NMS, gpu_id) # score = validate(net, nms_wrapper, ctx_list = ctx) # print(score) results = {} results["results"] = [] for im_name in tqdm.tqdm(im_names): TEST_SCALES = [[960, 1280]] one_img = {} one_img["filename"] = os.path.basename(im_name) one_img["rects"] = [] bboxes, scores, labels = im_detect_bbox_aug(net, nms_wrapper, im_name, TEST_SCALES, config.network.PIXEL_MEANS, config.TRAIN.BBOX_STDS,
def main(): global classes assert os.path.exists(args.input), ('%s does not exist'.format(args.input)) im = cv2.imread(args.input, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) arr = np.array(im) origin_width, origin_height, _ = arr.shape portion = smart_chipping(origin_width, origin_height) # manually update the configuration # print(config.SCALES[0][0]) # TODO: note this is hard coded and assume there are three values for the SCALE configuration config.SCALES[0] = (portion, portion, portion) # config.max_per_image = # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_fpn_dcn_rcnn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data data = [] # portion = args.chip_size cwn, chn = (portion, portion) wn, hn = (int(origin_width / cwn), int(origin_height / chn)) padding_y = int( math.ceil(float(origin_height) / chn) * chn - origin_height) padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width) print("padding_y,padding_x, origin_height, origin_width", padding_y, padding_x, origin_height, origin_width) # top, bottom, left, right - border width in number of pixels in corresponding directions im = cv2.copyMakeBorder(im, 0, padding_x, 0, padding_y, cv2.BORDER_CONSTANT, value=[0, 0, 0]) # the section below could be optimized. but basically the idea is to re-calculate all the values arr = np.array(im) width, height, _ = arr.shape cwn, chn = (portion, portion) wn, hn = (int(width / cwn), int(height / chn)) image_list = chip_image(im, (portion, portion)) for im in image_list: target_size = portion max_size = portion im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) # print("im.shape,im_scale",im.shape,im_scale) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_xview_480_640_800_alltrain'), 11, process=True) # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True) print("loading parameter done") if args.cpu_only: predictor = Predictor(sym, data_names, label_names, context=[mx.cpu()], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(config.TEST.NMS) else: predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(args.gpu_index)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) num_preds = int(5000 * math.ceil(float(portion) / 400)) # test boxes, scores, classes = generate_detections(data, data_names, predictor, config, nms, image_list, num_preds) #Process boxes to be full-sized print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height", width, height) bfull = boxes.reshape((wn, hn, num_preds, 4)) for i in range(wn): for j in range(hn): bfull[i, j, :, 0] += j * cwn bfull[i, j, :, 2] += j * cwn bfull[i, j, :, 1] += i * chn bfull[i, j, :, 3] += i * chn # clip values bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height) bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height) bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width) bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width) bfull = bfull.reshape((hn * wn, num_preds, 4)) scores = scores.reshape((hn * wn, num_preds)) classes = classes.reshape((hn * wn, num_preds)) #only display boxes with confidence > .5 # print(bfull, scores, classes) #bs = bfull[scores > 0.08] #cs = classes[scores>0.08] #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape) # s = im_name # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png") #scoring_line_threshold = 11000 #if bs.shape[0] > scoring_line_threshold: # too many predictions, we should trim the low confidence ones with open(args.output, 'w') as f: for i in range(bfull.shape[0]): for j in range(bfull[i].shape[0]): #box should be xmin ymin xmax ymax box = bfull[i, j] class_prediction = classes[i, j] score_prediction = scores[i, j] if int(class_prediction) != 0: f.write('%d %d %d %d %d %f \n' % \ (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction)) print('done')
def proposal_train(rpn_cls, rpn_reg, gt, feature_shape, image_shape, ctx): # Stop gradient to stop gradient recording rpn_cls = mx.nd.stop_gradient(rpn_cls) rpn_reg = mx.nd.stop_gradient(rpn_reg) # Get basic information of the feature and the image _n, _c, f_height, f_width = feature_shape _in, _ic, img_height, img_width = image_shape rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width)) anchors_count = rpn_cls.shape[1] # Recover RPN prediction with anchors ref_anchors = generate_anchors(base_size=16, ratios=cfg.anchor_ratios, scales=cfg.anchor_scales) anchors = map_anchors(ref_anchors, rpn_reg.shape, img_height, img_width, ctx) anchors = anchors.reshape((1, -1, 4, f_height, f_width)) anchors = mx.nd.transpose(anchors, (0, 3, 4, 1, 2)) rpn_anchor_scores = mx.nd.softmax(mx.nd.transpose(rpn_cls, (0, 3, 4, 1, 2)), axis=4)[:,:,:,:,1] rpn_reg = mx.nd.transpose(rpn_reg.reshape((1, -1, 4, f_height, f_width)), (0, 3, 4, 1, 2)) with mx.autograd.pause(): rpn_bbox_pred = bbox_inverse_transform(anchors.reshape((-1, 4)), rpn_reg.reshape((-1, 4))) rpn_bbox_pred = bbox_clip(rpn_bbox_pred, img_height, img_width) rpn_bbox_pred = rpn_bbox_pred.reshape((1, f_height, f_width, anchors_count, 4)) # Use NMS to filter out too many boxes rpn_bbox_pred = rpn_bbox_pred.asnumpy().reshape((-1, 4)) rpn_anchor_scores = rpn_anchor_scores.asnumpy().reshape((-1, )) rpn_bbox_proposal = np.hstack((rpn_bbox_pred, rpn_anchor_scores.reshape((rpn_anchor_scores.shape[0], 1)))) # rpn_anchor_scores, rpn_bbox_pred = nms(rpn_anchor_scores, rpn_bbox_pred, cfg.rpn_nms_thresh, use_top_n=cfg.bbox_count_before_nms) gpu_nms = gpu_nms_wrapper(cfg.rpn_nms_thresh, ctx.device_id, use_top_n=cfg.bbox_count_before_nms) keep = gpu_nms(rpn_bbox_proposal) rpn_bbox_proposal = rpn_bbox_proposal[keep][:, :4] # rpn_bbox_pred = mx.nd.array(rpn_bbox_pred, ctx) rpn_bbox_pred = mx.nd.array(rpn_bbox_proposal, ctx) del rpn_anchor_scores # append ground truth rpn_bbox_pred = mx.nd.concatenate([rpn_bbox_pred, gt[0][:,:4]]) # assign label for rpn_bbox_pred overlaps = bbox_overlaps(rpn_bbox_pred, gt[0][:, :4].reshape((-1, 4))) gt_assignment = mx.nd.argmax(overlaps, axis=1).asnumpy().astype(np.int32) max_overlaps = mx.nd.max(overlaps, axis=1).asnumpy() gt_labels = gt[0][:, 4].reshape((-1,)).asnumpy() gt_bboxes = gt[0][:, :4].reshape((-1, 4)).asnumpy() cls_labels = gt_labels[gt_assignment] rpn_bbox_pred_np = rpn_bbox_pred.asnumpy() reg_target = gt_bboxes[gt_assignment, :] cls_labels = cls_labels * (max_overlaps >= cfg.rcnn_fg_thresh) # sample positive and negative ROIs fg_inds = np.where(max_overlaps >= cfg.rcnn_fg_thresh)[0] bg_inds = np.where((max_overlaps >= cfg.rcnn_bg_lo_thresh) * (max_overlaps < cfg.rcnn_fg_thresh))[0] fg_nums = int(cfg.rcnn_train_sample_size * cfg.rcnn_train_fg_fraction) bg_nums = cfg.rcnn_train_sample_size - fg_nums if (len(fg_inds) > fg_nums): fg_inds = np.random.choice(fg_inds, size=fg_nums, replace=False) if (len(bg_inds) > bg_nums): bg_inds = np.random.choice(bg_inds, size=bg_nums, replace=False) cls_labels = np.concatenate([cls_labels[fg_inds], cls_labels[bg_inds]]) reg_target = np.concatenate([reg_target[fg_inds], reg_target[bg_inds]]) rpn_bbox_pred_np = np.concatenate([rpn_bbox_pred_np[fg_inds], rpn_bbox_pred_np[bg_inds]]) cls_labels = mx.nd.array(cls_labels, ctx) reg_target = mx.nd.array(reg_target, ctx) rpn_bbox_pred = mx.nd.array(rpn_bbox_pred_np, ctx) reg_target = bbox_transform(rpn_bbox_pred, reg_target) # Shape reg_target into 4 * num_classes reg_large_target = mx.nd.zeros((reg_target.shape[0], 4 * cfg.num_classes), ctx) for i in range(cls_labels.shape[0]): cur_label = int(cls_labels[i].asscalar()) if (cur_label != 0): reg_large_target[i, cur_label*4: (cur_label+1)*4] = reg_target[i, :] return rpn_bbox_pred, reg_large_target, cls_labels
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1) print 'done'
def process_video_frame(raw_frame_queue, bbox_frame_queue): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) arg_params, aux_params = load_param( './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True) # set up class names; Don't count the background in, even we are treat the background as label '0' num_classes = 4 classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights'] target_size = config.SCALES[0][1] max_size = config.SCALES[0][1] while True: tic() i = 0 data = [] frame_list = [] while len(data) < 15: frame = raw_frame_queue.get() if frame is None: continue if i < 2: i += 1 frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) bbox_frame_queue.put(frame) continue frame, im_scale = resize(frame, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(frame, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) frame_list.append(frame) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] # print('Debug: [data] shape: {}, cont: {}'.format(type(data), data)) max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # print('Debug: [max_data_shape] shape: {}, cont: {}'.format(type(max_data_shape), max_data_shape)) provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] # print('Debug: [provide_data] shape: {}, cont: {}'.format(type(provide_data), provide_data)) provide_label = [None for i in xrange(len(data))] # print('Debug: [provide_label] shape: {}, cont: {}'.format(type(provide_label), provide_label)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # Process video frame # image_names = ['frame'] # for idx, frame in enumerate(frame_list): data_batch = mx.io.DataBatch(data=data, label=[], pad=0, provide_data=provide_data, provide_label=provide_label) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] # print("length: {}".format(len(data_batch.data))) # print('Debug: [scales] cont: {}'.format(scales)) scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, config) # print('scores_all: Type: {}, Values: {}, Length: {}'.format(type(scores_all), scores_all, len(scores_all))) # print('boxes_all: Type: {}, Values: {}, Length: {}'.format(type(boxes_all), boxes_all, len(boxes_all))) # print('data_dict_all: Type: {}, Values: {}, length: {}'.format(type(data_dict_all), data_dict_all, len(data_dict_all))) # print('frame_list: Type: {}, Values: {}, Length: {}'.format(type(frame_list), frame_list, len(frame_list))) # print('scores_all: Type: {}, Length: {}, Values: {}'.format(type(scores_all[0]), len(scores_all[0]), scores_all[0])) # print(scores_all[0].shape) # print('boxes_all: Type: {}, Length: {}'.format(type(boxes_all), len(boxes_all))) # print(boxes_all[0].shape) # print('data_dict_all: Type: {}, length: {}'.format(type(data_dict_all), len(data_dict_all))) # print('frame_list: Type: {}, Length: {}'.format(type(frame_list), len(frame_list))) for idx, frame in enumerate(frame_list): # print('index: {}'.format(str(idx))) boxes = boxes_all[0].astype('f') scores = scores_all[0].astype('f') dets_nms = [] # print(scores.shape) for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) bbox_frame_queue.put( draw_bbox_on_frame(frame, dets_nms, classes, scale=scales[idx])) print(toc())
def process_image_fun(imagesPath=None, fileOp=None, vis=None, model_params_list=None, count=0): # init rfcn dcn detect model (mxnet) # model_params_list = init_detect_model() # num_classes = RFCN_DCN_CONFIG['num_classes'] # 0 is background, classes = RFCN_DCN_CONFIG['num_classes_name_list'] min_threshold = min(list( RFCN_DCN_CONFIG['need_label_thresholds'].values())) im_name = imagesPath all_can_read_image = [] data = [] all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im_name, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(model_params_list[0], data_names, label_names, context=[mx.gpu(1)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=model_params_list[1], aux_params=model_params_list[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > min_threshold, :] dets_nms.append(cls_dets) #print('testing {} {:.4f}s'.format(im_name, toc())) im = show_boxes_write_rg(im=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, count=count) return im
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_dff_flownet_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_batch_test_symbol(config) sym.save('dff_rfcn.json') #print config.network.get_internals() #mx.visualization.plot_network(sym).view() #print sym.get_intervals() #x = input() # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/sample/*.JPEG') output_dir = cur_path + '/../demo/rfcn_dff_batch/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 # data = [] key_im_tensor = None cur_im_tensor = [] im_info_tensor = [] image_names_list = [] image_names_batch = [] for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread( im_name) #, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION) #im = cv2.resize(im, (176,176,3)) #height, width, channel = img.shape #gray = im = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) #im = np.zeros(height * width * channel).reshape((height, width, channel)) #im[:,:,0] = gray #im[:,:,1] = gray #im[:,:,2] = gray target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print im.shape #print im_scale.shape im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor else: cur_im_tensor.append(im_tensor) im_info_tensor.append(im_info) image_names_batch.append(im_name) if (idx + 1) % key_frame_interval == 0 or idx == len(image_names) - 1: data.append({ 'data_other': np.concatenate(cur_im_tensor), 'im_info': np.concatenate(im_info_tensor), 'data_key': key_im_tensor }) key_im_tensor = None cur_im_tensor = [] im_info_tensor = [] image_names_list.append(image_names_batch) image_names_batch = [] # get predictor data_names = ['data_other', 'im_info', 'data_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data_other', (key_frame_interval - 1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) #print predictor nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(1): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[:, 2] for i in xrange(len(data_batch.data)) ] print scales[0].shape scores_all, boxes_all, data_dict = im_batch_detect( predictor, data_batch, data_names, scales, config) print "warmup done" # test time = 0 count = 0 for idx, im_names in enumerate(image_names_list): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[:, 2] for i in xrange(len(data_batch.data)) ] tic() scores_all, boxes_all, data_dict = im_batch_detect( predictor, data_batch, data_names, scales, config) time += toc() count += len(scores_all) print 'testing {} {:.4f}s x {:d}'.format(im_names[0], time / count, len(scores_all)) ''' for batch_idx in xrange(len(scores_all)): boxes = boxes_all[batch_idx].astype('f') scores = scores_all[batch_idx].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_names[batch_idx]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_names[batch_idx]) cv2.imwrite(output_dir + filename,out_im) ''' print 'done'
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] list_result_box[i] = cls_box[valid_ind, :] list_result_mask[i] = cls_mask[valid_ind, :] cls_start = cls_end return list_result_mask, list_result_box
def main(): # get symbol pprint.pprint(config) config.symbol = "resnet_v1_101_fpn_dcn_rcnn" if not args.rfcn_only else "resnet_v1_101_fpn_rcnn" sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # test # find all videos video_path = "../../tmp"#"../../aic2018/track1/track1_videos" video_files = sorted([ x for x in os.listdir(video_path) if x.endswith(".mp4")]) save_path = "../../tmp/output"#"../../aic2018/track1/output" if not os.path.isdir(save_path): os.makedirs(save_path) print("processing {} videos...".format(len(video_files))) pbar = tqdm(total=len(video_files)) for vf in video_files: vid = imageio.get_reader(os.path.join(video_path, vf),'ffmpeg') data = [] for idx, im in enumerate(vid): if idx == 0: #assert os.path.exists(im_path + im_name), ('%s does not exist'.format(im_path + im_name)) #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) else: break #data.append({'data': None, 'im_info': None}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] print("hhhhh") print(provide_data, provide_label) print("hhhhh") arg_params, aux_params = load_param(cur_path + '/../model/demo_model/' + ('fpn_dcn_coco' if not args.rfcn_only else 'fpn_coco'), 0, process=True) #print(type(arg_params), type(aux_params)) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) print("successfully load model") vout = [] # write to video writer = skvideo.io.FFmpegWriter(os.path.join(save_path, vf.replace(".mp4","_out.mp4")), outputdict={'-vcodec': 'libx264', '-b': '300000000'}) for frame_idx, im in enumerate(vid): #im = cv2.imread(im_path + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im_original = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data_idx = [{"data": im_tensor, "im_info": im_info}] data_idx = [[mx.nd.array(data_idx[i][name]) for name in data_names] for i in xrange(len(data_idx))] data_batch = mx.io.DataBatch(data=[data_idx[0]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_idx[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] num_dets = 0 for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.65, :] dets_nms.append(cls_dets) num_dets += cls_dets.shape[0] print 'testing {} the {} th frame at {:.4f}s, detections {}'.format(vf, frame_idx, toc(), num_dets) # save results #im = cv2.imread(im_path + im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #im_bbox = show_boxes(im, dets_nms, classes, 1) #cv2.imwrite(im_path + im_name.replace(".jpg", "_bbox.jpg"), im_bbox) save_im, outputs = show_boxes(im_original, dets_nms, classes, 1, False) #cv2.imwrite(os.path.join(save_path, "{}_{}.jpg".format(vf.replace(".mp4", ""), str(frame_idx).zfill(5))), save_im) writer.writeFrame(save_im) for out in outputs: vout.append([frame_idx] + out) # save the whole video detection into pickle file writer.close() with open(os.path.join(save_path, vf.replace(".mp4", "_detect.pkl")), "wb") as f: pickle.dump(vout, f, protocol=2) pbar.update(1) pbar.close() print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' model = '/../model/rfcn_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_test_symbol(config) # set up class names num_classes = 31 classes = ['airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn/' if not os.path.exists(output_dir): os.makedirs(output_dir) # data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time/count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename,out_im) print 'done'
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): #0.4 """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] ######################## # cls_box = cls_box[valid_ind, :] # cls_mask = cls_mask[valid_ind, :] # #print 'cls_box', cls_box # def nms(dets, thresh): # """ # greedily select boxes with high confidence and overlap with current maximum <= thresh # rule out overlap >= thresh # :param dets: [[x1, y1, x2, y2 score]] # :param thresh: retain overlap < thresh # :return: indexes to keep # """ # if dets.shape[0] == 0: # return [] # x1 = dets[:, 0] # y1 = dets[:, 1] # x2 = dets[:, 2] # y2 = dets[:, 3] # scores = dets[:, 4] # areas = (x2 - x1 + 1) * (y2 - y1 + 1) # order = scores.argsort()[::-1] # keep = [] # while order.size > 0: # i = order[0] # keep.append(i) # xx1 = np.maximum(x1[i], x1[order[1:]]) # yy1 = np.maximum(y1[i], y1[order[1:]]) # xx2 = np.minimum(x2[i], x2[order[1:]]) # yy2 = np.minimum(y2[i], y2[order[1:]]) # w = np.maximum(0.0, xx2 - xx1 + 1) # h = np.maximum(0.0, yy2 - yy1 + 1) # inter = w * h # ovr = inter / (areas[i] + areas[order[1:]] - inter) # inds = np.where(ovr <= thresh)[0] # order = order[inds + 1] # return keep # #print 'aaaaaaaa' # keep = nms(cls_box, 0.3) #bei niedrigen treshhold wirfts welche raus # #print 'aa', len(keep), len(boxes_scored_ar) # #print 'keep', keep # #print 'a', len(boxes_scored_ar) # #print 'b', len(boxes_scored_ar[keep, :]) # cls_box = cls_box[keep, :] # cls_mask = cls_mask[keep, :] # # print 'cls_box', cls_box # # print 'cls_mask', cls_mask # list_result_box[i] = cls_box # list_result_mask[i] = cls_mask ################# list_result_box[i] = cls_box[valid_ind, :] #auscommenten wenn nms an. list_result_mask[i] = cls_mask[valid_ind, :] #auscommenten wehn nms an cls_start = cls_end return list_result_mask, list_result_box
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_dff_flownet_vid' sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 # data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) # get predictor data_names = ['data', 'im_info', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in range(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in range(len(data))] provide_label = [None for i in range(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in range(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] if j % key_frame_interval == 0: scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) print("warmup done") # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in range(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) time += toc() count += 1 print('testing {} {:.4f}s'.format(im_name, time / count)) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename, out_im) print('done')
def main(tempFileList, fileOp): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) out_dir = os.path.join( cur_path, 'demo/output/terror-det-rg-data-output/terror-det-v0.9-test/JPEGImages' ) if not os.path.exists(out_dir): os.makedirs(out_dir) # set up class names num_classes = 7 classes = [ 'tibetan flag', 'guns', 'knives', 'not terror', 'islamic flag', 'isis flag' ] # load demo data image_names = tempFileList data = [] for im_name in image_names: im_file = im_name print(im_file) im = cv2.imread(im_file, cv2.IMREAD_COLOR) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/demo/models/' + ('rfcn_voc'), 10, process=True) #modify by zxt #mx.model.save_checkpoint('f1/final', 10, sym, arg_params, aux_params) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test # fileOp = open(os.path.join(cur_path, 'terror-det-rg-test-result.txt'), 'w') fileOp = fileOp for idx, im_name in enumerate(image_names): print("begining process %s" % (im_name)) data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im_result = show_boxes(fileOp, im_name, im, dets_nms, classes, 1) cv2.imwrite(out_dir + im_name.split('/')[-1], im_result) print 'done'
def forward(self, is_train, req, in_data, out_data, aux): before_pyramid_proposal = datetime.now() nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) LAYER_NUM = len(in_data) / 2 LAYER_NUM = 11 if LAYER_NUM == 7: cls_prob_dict = { 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[13], 'stride32': in_data[12], 'stride16': in_data[11], 'stride8': in_data[10], 'stride4': in_data[9], 'stride2': in_data[8], 'stride1': in_data[7], } elif LAYER_NUM == 6: cls_prob_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride16': in_data[3], 'stride8': in_data[2], 'stride4': in_data[1], 'stride2': in_data[0], } bbox_pred_dict = { 'stride64': in_data[11], 'stride32': in_data[10], 'stride16': in_data[9], 'stride8': in_data[8], 'stride4': in_data[7], 'stride2': in_data[6], } elif LAYER_NUM == 5: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } elif LAYER_NUM == 2: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], } elif LAYER_NUM == 11: cls_prob_dict = { 'stride64': in_data[0], } bbox_pred_dict = { 'stride64': in_data[1], } elif LAYER_NUM == 1: cls_prob_dict = { 'stride1': in_data[0], } bbox_pred_dict = { 'stride1': in_data[1], } elif LAYER_NUM == 3: cls_prob_dict = { 'stride64': in_data[2], 'stride32': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride1': in_data[3], } ''' cls_prob_dict = { 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride8': in_data[7], 'stride4': in_data[6], 'stride2': in_data[5], 'stride1': in_data[4], } ''' ''' cls_prob_dict = { 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride2': in_data[3], 'stride1': in_data[2], } ''' pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] channel_list = [] before_feat = datetime.now() for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] if DEBUG: scores1 = cls_prob_dict['stride' + str(s)].asnumpy() print "scores.shape:" + str(scores.shape) print "scores1.shape:" + str(scores1.shape) #print "scores.shape:"+str(scores.shape) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() #print "bbox_deltas.shape:"+str(bbox_deltas.shape) im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors before_enume = datetime.now() A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) after_enume = datetime.now() #print "enume time:"+str((after_enume-before_enume).seconds) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) if DEBUG: print "scores[:100]:" + str(scores[:50]) channels = np.ones((scores.shape)) * stride # Convert anchors into proposals via bbox transformations before_pred = datetime.now() proposals = bbox_pred(anchors, bbox_deltas) after_pred = datetime.now() #print "pred_time:" #print (after_pred-before_pred).seconds # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) if DEBUG: print str(min_size) print str(im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] if DEBUG: print "proposals3:" + str(proposals[0:10]) scores = scores[keep] channels = channels[keep] proposal_list.append(proposals) score_list.append(scores) channel_list.append(channels) after_feat = datetime.now() #print "feat time:" #print (after_feat-before_feat).seconds proposals = np.vstack(proposal_list) scores = np.vstack(score_list) channels = np.vstack(channel_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) before_sort = datetime.now() order = scores.ravel().argsort()[::-1] after_sort = datetime.now() #print "sort time:" #print (after_sort-before_sort).seconds if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] channels = channels[order] if DEBUG: print '-------1-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] channels = channels[keep] if DEBUG: print '-------2-------' print channels.shape for s in self._feat_stride: print "stride:" + str(s) print len(np.where(channels == float(s))[0]) print "proposals:" + str(proposals[0:20]) print "scores:" + str(scores[0:20]) f_chan = open('channels.txt', 'w') for ii in range(channels.shape[0]): f_chan.write(str(channels[ii][0]) + ' ') f_chan.close() # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) #print "out_data[0].shape"+str(out_data[0].shape) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) after_pyramid_proposal = datetime.now()
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) if LAYER_NUM == 7: cls_prob_dict = { 'stride64': in_data[6], 'stride32': in_data[5], 'stride16': in_data[4], 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride64': in_data[13], 'stride32': in_data[12], 'stride16': in_data[11], 'stride8': in_data[10], 'stride4': in_data[9], 'stride2': in_data[8], 'stride1': in_data[7], } elif LAYER_NUM == 6: cls_prob_dict = { 'stride64': in_data[5], 'stride32': in_data[4], 'stride16': in_data[3], 'stride8': in_data[2], 'stride4': in_data[1], 'stride2': in_data[0], } bbox_pred_dict = { 'stride64': in_data[11], 'stride32': in_data[10], 'stride16': in_data[9], 'stride8': in_data[8], 'stride4': in_data[7], 'stride2': in_data[6], } elif LAYER_NUM == 5: cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } ''' cls_prob_dict = { 'stride8': in_data[3], 'stride4': in_data[2], 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride8': in_data[7], 'stride4': in_data[6], 'stride2': in_data[5], 'stride1': in_data[4], } ''' ''' cls_prob_dict = { 'stride2': in_data[1], 'stride1': in_data[0], } bbox_pred_dict = { 'stride2': in_data[3], 'stride1': in_data[2], } ''' pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] channel_record_list = [] crop_nums = 9 for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) #print "cls_prob_dict['stride' + str(s)].shape:"+str(cls_prob_dict['stride' + str(s)].shape) #print cls_prob_dict['stride' + str(s)].asnumpy().shape scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] #print "scores.shape:"+str(scores.shape) bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() #print "bbox_deltas.shape:"+str(bbox_deltas.shape) im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] temp_anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) temp_anchors = temp_anchors.reshape((K * A, 4)) anchors = np.zeros((0, 4)) channel_records = np.zeros((0, 1)) for channel in range(crop_nums): anchors = np.vstack((anchors, temp_anchors)) channels = np.ones(K * A) * channel channels = channels.reshape((-1, 1)) channel_records = np.vstack((channel_records, channels)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) #print "proposals.shape" #print proposals.shape keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] #print "scores.shape" #print scores.shape scores = scores[keep] channel_records = channel_records[keep] proposal_list.append(proposals) score_list.append(scores) channel_record_list.append(channel_records) channel_records = np.vstack(channel_record_list) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] channel_records = channel_records[order] #print "channel_records:" #print channel_records #print channel_records.shape # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) # 9. nms on different channel keeps = np.zeros(0) avg_post_nms_topN = int(post_nms_topN / crop_nums) for i in range(crop_nums): channel_index = np.where(channel_records == i)[0] temp_ch_proposals = proposals[channel_index, :] #print proposals.shape #print temp_ch_proposals.shape temp_scores = scores[channel_index] #print temp_scores.shape det = np.hstack( (temp_ch_proposals, temp_scores)).astype(np.float32) #print det.shape #keep = np.zeros(1) if det.shape[0] > 0: keep = nms(det) if avg_post_nms_topN > 0: keep = keep[:avg_post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < avg_post_nms_topN: pad = npr.choice(keep, size=avg_post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) keeps = np.hstack((keeps, channel_index[keep])).astype(np.int) proposals = proposals[keeps, :] scores = scores[keeps] channel_records = channel_records[keeps] #proposals.hstack((proposals,channel_records)) #print channel_records.shape # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) #print "out_data[0].shape"+str(out_data[0].shape) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = in_data[1].asnumpy() im_info = in_data[2].asnumpy()[0, :] if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / self._feat_stride), int( im_info[1] / self._feat_stride) if DEBUG: print('score map size: {}'.format(scores.shape)) print("resudial: {}".format( (scores.shape[2] - height, scores.shape[3] - width))) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def main(): # get symbol pprint.pprint(config) config.symbol = 'impression_network_dynamic_offset_sparse' model = '/../local_run_output/impression_dynamic_offset-lr-10000-times-neighbor-4-dense-4' first_sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym_instance = eval(config.symbol + '.' + config.symbol)() cur_sym_instance = eval(config.symbol + '.' + config.symbol)() first_sym = first_sym_instance.get_first_test_symbol_impression(config) key_sym = key_sym_instance.get_key_test_symbol_impression(config) cur_sym = cur_sym_instance.get_cur_test_symbol_impression(config) # set up class names num_classes = 31 classes = [ 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00011005/*.JPEG') output_dir = cur_path + '/../demo/motion-prior-output-00011005/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = 10 image_names.sort() data = [] for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: if idx == 0: data_oldkey = im_tensor.copy() data_newkey = im_tensor.copy() data_cur = im_tensor.copy() else: data_oldkey = data_newkey.copy() data_newkey = im_tensor else: data_cur = im_tensor shape = im_tensor.shape infer_height = int(np.ceil(shape[2] / 16.0)) infer_width = int(np.ceil(shape[3] / 16.0)) data.append({ 'data_oldkey': data_oldkey, 'data_newkey': data_newkey, 'data_cur': data_cur, 'im_info': im_info, 'impression': np.zeros( (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)), 'key_feat_task': np.zeros( (1, config.network.DFF_FEAT_DIM, infer_height, infer_width)) }) # get predictor data_names = [ 'data_oldkey', 'data_cur', 'data_newkey', 'im_info', 'impression', 'key_feat_task' ] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data_oldkey', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_newkey', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_cur', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63)) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 4, process=True) first_predictor = Predictor(first_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][3].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: # keyframe if j == 0: # first frame scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online( first_predictor, data_batch, data_names, scales, config) feat_task = conv_feat impression = conv_feat else: # keyframe data_batch.data[0][-2] = impression data_batch.provide_data[0][-2] = ('impression', impression.shape) scores, boxes, data_dict, conv_feat, impression, feat_task = im_detect_impression_online( key_predictor, data_batch, data_names, scales, config) else: # current frame data_batch.data[0][-1] = feat_task data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape) scores, boxes, data_dict, _, _, _, _ = im_detect_impression_online( cur_predictor, data_batch, data_names, scales, config) print "warmup done" # test time = 0 count = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][3].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() print(idx) if idx % key_frame_interval == 0: # keyframe if idx == 0: # first frame scores, boxes, data_dict, conv_feat, _, _, _ = im_detect_impression_online( first_predictor, data_batch, data_names, scales, config) feat_task = conv_feat impression = conv_feat feat_task_numpy = feat_task.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_numpy) else: # keyframe data_batch.data[0][-2] = impression data_batch.provide_data[0][-2] = ('impression', impression.shape) scores, boxes, data_dict, conv_feat, impression, feat_task, _ = im_detect_impression_online( key_predictor, data_batch, data_names, scales, config) feat_task_key_numpy = feat_task.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_key_numpy) else: # current frame data_batch.data[0][-1] = feat_task data_batch.provide_data[0][-1] = ('key_feat_task', feat_task.shape) scores, boxes, data_dict, _, _, _, feat_task_cur = im_detect_impression_online( cur_predictor, data_batch, data_names, scales, config) if idx >= 1: feat_task_cur_numpy = feat_task_cur.asnumpy() np.save("features/impression_%s.npy" % (idx), feat_task_cur_numpy) #import pdb;pdb.set_trace() time += toc() count += 1 print 'testing {} {:.4f}s'.format(im_name, time / count) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) # visualize im = cv2.imread(im_name) #im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # show_boxes(im, dets_nms, classes, 1) out_im = draw_boxes(im, dets_nms, classes, 1) _, filename = os.path.split(im_name) cv2.imwrite(output_dir + filename, out_im) print 'done'
# provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] # provide_label = [None for i in xrange(len(data))] # arg_params, aux_params = load_param(cur_path + model, 0, process=True) # key_predictor = Predictor(key_sym, data_names, label_names, # context=[ctx], max_data_shapes=max_data_shape, # provide_data=provide_data, provide_label=provide_label, # arg_params=arg_params, aux_params=aux_params) # cur_predictor = Predictor(cur_sym, data_names, label_names, # context=[ctx], max_data_shapes=max_data_shape, # provide_data=provide_data, provide_label=provide_label, # arg_params=arg_params, aux_params=aux_params) if device_name == 'cpu': nms = cpu_nms_wrapper(config.TEST.NMS) else: nms = gpu_nms_wrapper(config.TEST.NMS, 0) # print data[0]['data'].shape #%% from collections import namedtuple BatchKeyFeat = namedtuple('BatchKeyFeat', ['data']) BatchKeyRpn = namedtuple('BatchKeyRpn', ['conv_feat', 'im_info']) BatchKey = namedtuple('BatchKey', ['data', 'im_info', 'data_key', 'feat_key']) # lists to store running time. time_list_key_feat = [] time_list_key_rpn = [] time_list_key = [] time_list_cur_flow = []
def inference_rcnn_UADETRAC(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) #roidb = imdb.gt_roidb_Shuo() roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rfcn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb_Shuo() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) print 'len(roidb):', len(roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) print 'inferring: ', prefix, ' epoch: ', epoch """# write parameters to file print 'type(arg_params):',type(arg_params) print 'type(aux_params):',type(aux_params) thefile1 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/arg_params.txt','w') thefile2 = open('/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC/aux_params.txt','w') for item_arg in arg_params.items(): thefile1.write(item_arg[0] + str(type(item_arg[1])) + str(item_arg[1].shape)+'\n') for item_aux in aux_params.items(): thefile2.write(item_aux[0] + str(type(item_aux[1])) + str(item_aux[1].shape)+'\n') """ # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append( ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(cfg.TEST.NMS, 0) # start detection # pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger) print 'test_data.size', test_data.size print 'test_data:', test_data print 'data_names:', data_names print 'test_data.provide_data:', test_data.provide_data print 'test_data.provide_label:', test_data.provide_label nnn = 0 classes = ['__background', 'vehicle'] #num_classes = 10 #classes = ['__DontCare__','Car','Suv','SmallTruck','MediumTruck','LargeTruck','Pedestrian','Bus','Van','GroupofPeople'] for im_info, data_batch in test_data: print nnn #print 'roidb[nnn]:',roidb[nnn]['image'] image_name = roidb[nnn]['image'] tic() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) boxes = boxes_all[0].astype('f') scores = scores_all[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] #cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(image_name, toc()) # visualize im = cv2.imread(image_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) #print 'cls_dets:',cls_dets #show_boxes(im, dets_nms, classes, 1) nnn = nnn + 1 image_name_length = len(image_name.split('/')) sequence_name = image_name.split('/')[image_name_length - 2] output_file = os.path.join( '/raid10/home_ext/Deformable-ConvNets/data/data_Shuo/UADETRAC', 'Outputs', sequence_name + '_Det_DFCN.txt') frame_id = int(image_name.split('/')[image_name_length - 1][3:8]) thefile = open(output_file, 'a') det_id = 0 for x_small, y_small, x_large, y_large, prob in dets_nms[0]: det_id += 1 thefile.write( str(frame_id) + ',' + str(det_id) + ',' + str(x_small) + ',' + str(y_small) + ',' + str(max(x_large - x_small, 0.001)) + ',' + str(max(y_large - y_small, 0.001)) + ',' + str(prob) + '\n')
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_deeplab' model1 = '/../model/rfcn_dff_flownet_vid' model2 = '/../model/deeplab_dcn_cityscapes' sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) # settings num_classes = 19 interv = args.interval num_ex = args.num_ex # load demo data image_names = sorted( glob.glob(cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' + str(interv) + '/*.png')) image_names = image_names[:interv * num_ex] label_files = sorted( glob.glob( cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png')) output_dir = cur_path + '/../demo/deeplab_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = interv # data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) # get predictor data_names = ['data', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # models: rfcn_dff_flownet_vid, deeplab_cityscapes arg_params, aux_params = load_param_multi(cur_path + model1, cur_path + model2, 0, process=True) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] print "warmup done" # test time = 0 count = 0 hist = np.zeros((num_classes, num_classes)) lb_idx = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: print '\nframe {} (key)'.format(idx) # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: print '\nframe {} (intermediate)'.format(idx) data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] elapsed = toc() time += elapsed count += 1 print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed, time / count) pred = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(pred) pallete = getpallete(256) segmentation_result.putpalette(pallete) _, im_filename = os.path.split(im_name) segmentation_result.save(output_dir + '/seg_' + im_filename) label = None _, lb_filename = os.path.split(label_files[lb_idx]) im_comps = im_filename.split('_') lb_comps = lb_filename.split('_') # if annotation available for frame if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]: print 'label {}'.format(lb_filename) label = np.asarray(Image.open(label_files[lb_idx])) if lb_idx < len(label_files) - 1: lb_idx += 1 if label is not None: curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes) hist += curr_hist print 'mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2)) print '(cum) mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2)) ious = per_class_iu(hist) * 100 print ' '.join('{:.03f}'.format(i) for i in ious) print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2)) print 'done'
def process_one_batch_images_fun(isUrlFlag=False, one_batch_images_list=None, init_model_param=None, fileOp=None, vis=False): # init_model_param list : [sym, arg_params, aux_params] num_classes = 11 # 0 is background, # classes = ['tibetan flag', 'guns', 'knives', # 'not terror', 'islamic flag', 'isis flag'] classes = [ 'islamic flag', 'isis flag', 'tibetan flag', 'knives_true', 'guns_true', 'knives_false', 'knives_kitchen', 'guns_anime', 'guns_tools', 'not terror' ] image_names = one_batch_images_list if len(image_names) <= 0: return all_can_read_image = [] data = [] for im_name in image_names: #print("process : %s"%(im_name)) im = readImage_fun(isUrlFlag=isUrlFlag, imagePath=im_name) # 判断 这个图片是否可读 if np.shape(im) == (): print("ReadImageError : %s" % (im_name)) continue all_can_read_image.append(im_name) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(init_model_param[0], data_names, label_names, context=[mx.gpu(int(args.gpuId))], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=init_model_param[1], aux_params=init_model_param[2]) nms = gpu_nms_wrapper(config.TEST.NMS, 0) for idx, im_name in enumerate(all_can_read_image): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > args.threshold, :] dets_nms.append(cls_dets) print('testing {} {:.4f}s'.format(im_name, toc())) show_boxes(isUrlFlag=isUrlFlag, im_name=im_name, dets=dets_nms, classes=classes, scale=1, vis=vis, fileOp=fileOp, flag=args.outputFileFlag) print('process one batch images done') pass
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names; Don't count the background in, even we are treat the background as label '0' num_classes = 4 classes = ['vehicle', 'pedestrian', 'cyclist', 'traffic lights'] # load demo data image_path = './data/RoadImages/test/' image_names = glob.glob(image_path + '*.jpg') print("Image amount {}".format(len(image_names))) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][1] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( './output/rfcn/road_obj/road_train_all/all/' + 'rfcn_road', 19, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # test notation_dict = {} for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # notation_list.append(get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True)) notation_dict.update( get_notation(im_name, dets_nms, classes, scale=1.0, gen_bbox_pic=True)) save_notation_file(notation_dict) print 'done'
def main(): # settings num_classes = 19 snip_len = 30 version = str(args.version) interv = args.interval num_ex = args.num_ex avg_acc = args.avg_acc # validate params if version not in ['18', '34', '50', '101']: raise ValueError( "Invalid Accel version '%s' - must be one of Accel-{18,34,50,101}" % version) if interv < 1: raise ValueError("Invalid interval %d - must be >=1" % interv) if num_ex < 1: raise ValueError("Invalid num_ex %d - must be >=1" % num_ex) # get symbol pprint.pprint(config) config.symbol = 'accel_' + version model1 = '/../model/rfcn_dff_flownet_vid' model2 = '/../model/accel-' + version sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) path_demo_data = '/ebs/Accel/data/cityscapes/' path_demo_labels = '/ebs/Accel/data/cityscapes/' if path_demo_data == '' or path_demo_labels == '': raise ValueError("Must set path to demo data + labels") # load demo data image_names = sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/frankfurt/*.png')) image_names += sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/lindau/*.png')) image_names += sorted( glob.glob(path_demo_data + 'leftImg8bit_sequence/val/munster/*.png')) image_names = image_names[:snip_len * num_ex] label_files = sorted( glob.glob(path_demo_labels + 'gtFine/val/frankfurt/*trainIds.png')) label_files += sorted( glob.glob(path_demo_labels + 'gtFine/val/lindau/*trainIds.png')) label_files += sorted( glob.glob(path_demo_labels + 'gtFine/val/munster/*trainIds.png')) output_dir = cur_path + '/../demo/deeplab_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = interv # lb_pos = 19 image_names_trunc = [] for i in range(num_ex): snip_pos = i * snip_len if avg_acc: offset = i % interv else: offset = interv - 1 start_pos = lb_pos - offset image_names_trunc.extend(image_names[snip_pos + start_pos:snip_pos + start_pos + interv]) image_names = image_names_trunc data = [] key_im_tensor = None prev_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor if prev_im_tensor is None: prev_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': prev_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) prev_im_tensor = im_tensor # get predictor data_names = ['data', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model1, 0, process=True) arg_params_dcn, aux_params_dcn = load_param(cur_path + model2, 0, process=True) arg_params.update(arg_params_dcn) aux_params.update(aux_params_dcn) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) output_all, feat = im_segment(cur_predictor, data_batch) output_key = 'croped_score_output' if version == '101' else 'correction_output' output_all = [ mx.ndarray.argmax(output[output_key], axis=1).asnumpy() for output in output_all ] print "warmup done" # test time = 0 count = 0 hist = np.zeros((num_classes, num_classes)) lb_idx = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: print '\n\nframe {} (key)'.format(idx) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: print '\nframe {} (intermediate)'.format(idx) data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) output_all, feat = im_segment(cur_predictor, data_batch) output_key = 'croped_score_output' if version == '101' else 'correction_output' output_all = [ mx.ndarray.argmax(output[output_key], axis=1).asnumpy() for output in output_all ] elapsed = toc() time += elapsed count += 1 print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed, time / count) pred = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(pred) pallete = getpallete(256) segmentation_result.putpalette(pallete) _, im_filename = os.path.split(im_name) segmentation_result.save(output_dir + '/seg_' + im_filename) # compute accuracy label = None _, lb_filename = os.path.split(label_files[lb_idx]) im_comps = im_filename.split('_') lb_comps = lb_filename.split('_') # check if annotation available for frame if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]: print 'label {}'.format(lb_filename) label = np.asarray(Image.open(label_files[lb_idx])) if lb_idx < len(label_files) - 1: lb_idx += 1 if label is not None: curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes) hist += curr_hist print 'mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2)) print '(cum) mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2)) ious = per_class_iu(hist) * 100 print ' '.join('{:.03f}'.format(i) for i in ious) print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2)) print 'done'