def load_rpn_data(self, proposal_path='data/proposals', full=False): rpn_file = os.path.join(proposal_path, self.name + '_rpn.pkl') nms_cache_file = os.path.join(proposal_path, self.name+'_rpn_after_nms.pkl') if os.path.isfile(nms_cache_file): print('Reading cached proposals after ***NMS**** from {}'.format(nms_cache_file)) with open(nms_cache_file,'rb') as file: [boxes,maps] = cPickle.load(file) print('Done!') else: print rpn_file print 'Loading {}....'.format(rpn_file) assert os.path.exists(rpn_file), 'rpn data not found at {}'.format(rpn_file) with open(rpn_file, 'rb') as f: box_list = cPickle.load(f) print 'Done!' ttboxes = [] boxes = [] maps = [] print 'Applying NMS...' nms = py_nms_wrapper(0.7) for i in range(len(box_list)): tboxes = np.array(box_list[i]) ttboxes.append(tboxes) p = Pool(32) keeps = p.map(nmsp, ttboxes) print('Done!') for i in range(len(box_list)): boxes.append(ttboxes[i][keeps[i]]) p.close() print('Caching proposals after NMS to {}'.format(nms_cache_file)) with open(nms_cache_file,'wb') as file: cPickle.dump([boxes,maps],file,cPickle.HIGHEST_PROTOCOL) print('Done!') return boxes, maps
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] idx = 0 data_time, net_time, post_time = 0.0, 0.0, 0.0 t = time.time() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect(predictor, data_batch, data_names, scales, cfg) t2 = time.time() - t t = time.time() for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)): for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx+delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([all_boxes[j][idx+delta][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][idx+delta][:, -1] >= image_thresh)[0] all_boxes[j][idx+delta] = all_boxes[j][idx+delta][keep, :] if vis: boxes_this_image = [[]] + [all_boxes[j][idx+delta] for j in range(1, imdb.num_classes)] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size) if logger: logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def main(): parser = argparse.ArgumentParser() parser.add_argument("indir", type=lambda s: unicode(s, 'utf8'), help="Directory containing list of images") parser.add_argument("outfile", type=lambda s: unicode(s, 'utf8'), help="Path to write predictions") parser.add_argument("-d", "--device", type=int, default=0, help="Device ID to use") args = parser.parse_args() params = vars(args) # ---------------------------------------------------------- Read config ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] config['gpus'] = str(params['device']) # ---------------------------------------------------------- Load Images image_path_list = [] data = [] scale_factor = 1.0 img_dir = osp.abspath(params['indir']) det_thresh = 0.7 # Load abs paths of images for f in sorted(os.listdir(img_dir)): _, f_ext = osp.splitext(f) if f_ext in ['.jpg', '.png', '.jpeg']: f_path = osp.join(img_dir, f) image_path_list.append(f_path) print 'Loading {} images into memory...'.format(len(image_path_list)) for image_path in image_path_list: im = cv2.imread(image_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) height, width = im.shape[:2] im = cv2.resize( im, (int(scale_factor * width), int(scale_factor * height))) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) print 'Loaded {} images'.format(len(image_path_list)) # ---------------------------------------------------------- Predict predictions = [] # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( '/BS/orekondy2/work/opt/FCIS/model/fcis_coco', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, image_path in enumerate(image_path_list): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print '{} testing {} {:.4f}s'.format(idx, image_path, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > det_thresh) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(image_path_list[idx]) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) org_height, org_width = cv2.imread(image_path_list[idx]).shape[:2] # im = cv2.resize(im,(int(scale_factor*org_width), int(scale_factor*org_height))) """ visualize all detections in one image :param im_array: [b=1 c h w] in rgb :param detections: [ numpy.ndarray([[x1 y1 x2 y2 score]]) for j in classes ] :param class_names: list of names in imdb :param scale: visualize the scaled image :return: """ detections = dets class_names = classes cfg = config scale = 1.0 person_idx = class_names.index('person') dets = detections[person_idx] msks = masks[person_idx] for mask_idx, (det, msk) in enumerate(zip(dets, msks)): inst_arr = np.zeros_like(im[:, :, 0]) # Create a 2D W x H array bbox = det[:4] * scale cod = bbox.astype(int) if im[cod[1]:cod[3], cod[0]:cod[2], 0].size > 0: msk = cv2.resize( msk, im[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1, 0].T.shape) bimsk = (msk >= cfg.BINARY_THRESH).astype('uint8') # ------- Create bit-mask for this instance inst_arr[cod[1]:cod[3] + 1, cod[0]:cod[2] + 1] = bimsk # Add thresholded binary mask rs_inst_arr = scipy.misc.imresize(inst_arr, (org_height, org_width)) rle = mask.encode(np.asfortranarray(rs_inst_arr)) predictions.append({ 'image_path': image_path, 'label': 'person', 'segmentation': rle, 'bbox': bbox.tolist(), 'score': det[-1], }) del msk del bimsk del rs_inst_arr print 'Created {} predictions'.format(len(predictions)) # ---------------------------------------------------------- Write output with open(params['outfile'], 'wb') as wf: json.dump(predictions, wf, indent=2)
def write_vid_results_multiprocess(self, detection, gpu_id): """ write results files in pascal devkit path :param all_boxes: boxes to be processed [bbox, confidence] :return: None """ print 'Writing {} ImageNetVID results file'.format('all') filename = self.get_result_file_template(gpu_id).format('all') frame_seg_len = self.frame_seg_len nms = py_nms_wrapper(0.3) data_time = 0 all_boxes = detection[0] frame_ids = detection[1] start_idx = 0 sum_frame_ids = np.cumsum(frame_seg_len) first_true_id = frame_ids[0] start_video = np.searchsorted(sum_frame_ids, first_true_id) for im_ind in range(1, len(frame_ids)): t = time.time() true_id = frame_ids[im_ind] video_index = np.searchsorted(sum_frame_ids, true_id) if (video_index != start_video): # reprensents a new video t1 = time.time() video = [ all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes) ] dets_all = seq_nms(video) for j in xrange(1, self.num_classes): for frame_ind, dets in enumerate(dets_all[j - 1]): keep = nms(dets) all_boxes[j][frame_ind + start_idx] = dets[keep, :] start_idx = im_ind start_video = video_index t2 = time.time() print 'video_index=', video_index, ' time=', t2 - t1 data_time += time.time() - t if (im_ind % 100 == 0): print '{} seq_nms testing {} data {:.4f}s'.format( frame_ids[im_ind - 1], im_ind, data_time / im_ind) # the last video video = [ all_boxes[j][start_idx:im_ind] for j in range(1, self.num_classes) ] dets_all = seq_nms(video) for j in xrange(1, self.num_classes): for frame_ind, dets in enumerate(dets_all[j - 1]): keep = nms(dets) all_boxes[j][frame_ind + start_idx] = dets[keep, :] with open(filename, 'wt') as f: for im_ind in range(len(frame_ids)): for cls_ind, cls in enumerate(self.classes): if cls == '__background__': continue dets = all_boxes[cls_ind][im_ind] if len(dets) == 0: continue # the imagenet expects 0-based indices for k in range(dets.shape[0]): f.write( '{:d} {:d} {:.4f} {:.2f} {:.2f} {:.2f} {:.2f}\n'. format(frame_ids[im_ind], cls_ind, dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3])) return
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_fgfa_flownet_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = [ '__background__', 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra' ] # load demo data image_names = sorted( glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG')) output_dir = cur_path + '/../demo/rfcn_fgfa/' if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[idx]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): file_name = '{:06d}'.format(file_idx) data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, element) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_name, out_im) print 'testing {} {:.4f}s'.format(file_name + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_name, out_im) print 'testing {} {:.4f}s'.format(file_name + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): file_name = '{:06d}'.format(idx) boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, file_name, out_im) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [ 'COCO_test2015_000000000891.jpg', 'COCO_test2015_000000001669.jpg' ] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.cpu()], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(config.TEST.NMS) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_boxes(im, dets_nms, classes, 1, "marked_{}".format(im_name)) print 'done'
def pred_eval_quadrangle(predictor, test_data, imdb, cfg, vis=False, draw=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) # imdb.count_ar() imdb.check_transform() imdb.draw_gt_and_detections(all_boxes, thresh=0.1) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 9 array of detections in # (x1, y1, x2, y2, x3, y3, x4, y4, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] idx = 0 data_time, net_time, post_time = 0.0, 0.0, 0.0 t = time.time() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect_quadrangle( predictor, data_batch, data_names, scales, cfg) t2 = time.time() - t t = time.time() for delta, (scores, boxes, data_dict) in enumerate( zip(scores_all, boxes_all, data_dict_all)): # idx = int(data_dict['im_index'])-1 for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 8:16] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 8:(j + 1) * 8] temp_cls_boxes = np.zeros((cls_boxes.shape[0], 4), dtype=cls_boxes.dtype) temp_cls_boxes_x = np.vstack((cls_boxes[:, 0], cls_boxes[:, 2], cls_boxes[:, 4], cls_boxes[:, 6])) temp_cls_boxes_y = np.vstack((cls_boxes[:, 1], cls_boxes[:, 3], cls_boxes[:, 5], cls_boxes[:, 7])) temp_cls_boxes[:, 0] = np.amin(temp_cls_boxes_x, axis=0) temp_cls_boxes[:, 1] = np.amin(temp_cls_boxes_y, axis=0) temp_cls_boxes[:, 2] = np.amax(temp_cls_boxes_x, axis=0) temp_cls_boxes[:, 3] = np.amax(temp_cls_boxes_y, axis=0) cls_dets = np.hstack((temp_cls_boxes, cls_scores)) cls_quadrangle_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_quadrangle_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) if draw: if not os.path.isdir(cfg.TEST.save_img_path): os.mkdir(cfg.TEST.save_img_path) path = os.path.join(cfg.TEST.save_img_path, str(idx) + '.jpg') boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] im = draw_all_quadrangle_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg, threshold=0.2) print path cv2.imwrite(path, im) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) imdb.draw_gt_and_detections(all_boxes, thresh=0.1) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def net_inference(model): """ generate data_batch -> im_detect -> post process :param predictor: Predictor :param image_name: image name :return: None """ # datas = json.loads(args) predictor = model['predictor'] classes = model['classes'] threshold = model['threshold'] thresholds = model['thresholds'] rets = [] nms = py_nms_wrapper(config.TEST.NMS) box_voting = py_box_voting_wrapper(config.TEST.BOX_VOTING_IOU_THRESH, config.TEST.BOX_VOTING_SCORE_THRESH, with_nms=True) try: time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) for i in sorted(os.listdir("/tmp/eval/init/images")): imageFile = os.path.join("/tmp/eval/init/images", i) print('-*-' * 50) print(imageFile) _t1 = time.time() try: im = load_image(imageFile, 50.0) except ErrorBase as e: rets.append({"code": e.code, "message": e.message}) continue data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale, config) det_ret = [] for cls_index, cls in enumerate(classes[1:], start=1): if len(cls) > 1: cls_ind = int(cls[0]) cls_name = cls[1] else: cls_ind = cls_index cls_name = cls[0] cls_boxes = boxes[0][:, 4:8] if config.CLASS_AGNOSTIC else boxes[ 0][:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[0][:, cls_ind, np.newaxis] if len(classes) <= len(thresholds): threshold = thresholds[cls_ind] keep = np.where(cls_scores >= threshold)[0] dets = np.hstack( (cls_boxes, cls_scores)).astype(np.float32)[keep, :] # if "20170930_guns_1083.jpg" in imageFile: # np.savetxt(fileOp, dets, delimiter=",") # pass keep = nms(dets) # if "20170930_guns_1083.jpg" in imageFile: # # print("end"*10) # # print(dets[keep, :]) # # print('*'*100) # # for i in dets: # # fileOp_1_op.write(i) # # fileOp_1_op.write('\n') # np.savetxt(fileOp_1, dets[keep, :], delimiter=",") det_ret.extend( _build_result(det, cls_name, cls_ind) for det in dets[keep, :]) _t2 = time.time() print("inference image time : %f" % (_t2 - _t1)) rets.append( dict(code=0, message=imageFile, result=json.dumps(dict(detections=det_ret)))) except Exception, e: print(traceback.format_exc())
def pred_double_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True, show_gt=False): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] label_names = [k[0] for k in test_data.provide_label[0]] num_images = test_data.size if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) #if cfg.TEST.SOFTNMS: # nms = py_softnms_wrapper(cfg.TEST.NMS) #else: # nms = py_nms_wrapper(cfg.TEST.NMS) if cfg.TEST.SOFTNMS: nms = py_softnms_wrapper(cfg.TEST.NMS) else: nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] ref_all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # class_lut = [[] for _ in range(imdb.num_classes)] valid_tally = 0 valid_sum = 0 idx = 0 t = time.time() inference_count = 0 all_inference_time = [] post_processing_time = [] nms_full_count = [] nms_pos_count = [] is_max_count = [] all_count = [] for im_info, ref_im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all, label_dict_all = im_double_detect( predictor, data_batch, data_names, label_names, scales, cfg) t2 = time.time() - t t = time.time() # for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)): nms_full_count_per_batch = 0 nms_pos_count_per_batch = 0 global num_of_is_full_max is_max_count_per_batch = num_of_is_full_max[0] all_count_per_batch = 0 for delta, (scores, boxes, ref_scores, ref_boxes, data_dict, label_dict) in enumerate( zip(scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all, label_dict_all)): if cfg.TEST.LEARN_NMS: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j - 1, 0] > thresh)[0] cls_scores = scores[indexes, j - 1, :] cls_boxes = boxes[indexes, j - 1, :] cls_dets = np.hstack((cls_boxes, cls_scores)) # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(scores) all_boxes[j][idx + delta] = cls_dets if DEBUG: keep = nms(cls_dets) nms_cls_dets = cls_dets[keep, :] target = label_dict['nms_multi_target'] target_indices = np.where(target[:, 4] == j - 1) target = target[target_indices] nms_full_count_per_batch += bbox_equal_count( nms_cls_dets, target) gt_boxes = label_dict['gt_boxes'][0].asnumpy() gt_boxes = gt_boxes[np.where(gt_boxes[:, 4] == j)[0], :4] gt_boxes /= scales[delta] if len(cls_boxes) != 0 and len(gt_boxes) != 0: overlap_mat = bbox_overlaps( cls_boxes.astype(np.float), gt_boxes.astype(np.float)) keep = nms( cls_dets[np.where(overlap_mat > 0.5)[0]]) nms_cls_dets = cls_dets[np.where( overlap_mat > 0.5)[0]][keep] nms_pos_count_per_batch += bbox_equal_count( nms_cls_dets, target) all_count_per_batch += len(target) else: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] if cfg.TEST.FIRST_N > 0: # todo: check whether the order affects the result sort_indices = np.argsort( scores[:, j])[-cfg.TEST.FIRST_N:] # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N] indexes = np.intersect1d(sort_indices, indexes) cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 4:(j + 1) * 4] # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx+delta) valid_tally += len(cls_scores) valid_sum += len(scores) # print np.min(cls_scores), valid_tally, valid_sum # cls_scores = scores[:, j, np.newaxis] # cls_scores[cls_scores <= thresh] = thresh # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) if cfg.TEST.SOFTNMS: all_boxes[j][idx + delta] = nms(cls_dets) else: keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] # all_boxes[j][idx + delta] = cls_dets if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] if show_gt: gt_boxes = label_dict['gt_boxes'][0] for gt_box in gt_boxes: gt_box = gt_box.asnumpy() gt_cls = int(gt_box[4]) gt_box = gt_box / scales[delta] gt_box[4] = 1 if cfg.TEST.LEARN_NMS: gt_box = np.append(gt_box, 1) boxes_this_image[gt_cls] = np.vstack( (boxes_this_image[gt_cls], gt_box)) if cfg.TEST.LEARN_NMS: target_boxes = label_dict['nms_multi_target'] for target_box in target_boxes: print("cur", target_box * scales[delta]) target_cls = int(target_box[4]) + 1 target_box[4] = 2 + target_box[5] target_box[5] = target_box[6] target_box = target_box[:6] boxes_this_image[target_cls] = np.vstack( (boxes_this_image[target_cls], target_box)) # vis_all_detection(data_dict['ref_data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) # vis_double_all_detection(data_dict['data'].asnumpy(), boxes_this_image, data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) if cfg.TEST.LEARN_NMS: for j in range(1, imdb.num_classes): indexes = np.where(ref_scores[:, j - 1, 0] > thresh)[0] cls_scores = ref_scores[indexes, j - 1, :] cls_boxes = ref_boxes[indexes, j - 1, :] cls_dets = np.hstack((cls_boxes, cls_scores)) # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(ref_scores) ref_all_boxes[j][idx + delta] = cls_dets if DEBUG: pass keep = nms(cls_dets) nms_cls_dets = cls_dets[keep, :] target = label_dict['ref_nms_multi_target'] target_indices = np.where(target[:, 4] == j - 1) target = target[target_indices] nms_full_count_per_batch += bbox_equal_count( nms_cls_dets, target) gt_boxes = label_dict['ref_gt_boxes'][0].asnumpy() gt_boxes = gt_boxes[np.where(gt_boxes[:, 4] == j)[0], :4] gt_boxes /= scales[delta] if len(cls_boxes) != 0 and len(gt_boxes) != 0: overlap_mat = bbox_overlaps( cls_boxes.astype(np.float), gt_boxes.astype(np.float)) keep = nms( cls_dets[np.where(overlap_mat > 0.5)[0]]) nms_cls_dets = cls_dets[np.where( overlap_mat > 0.5)[0]][keep] nms_pos_count_per_batch += bbox_equal_count( nms_cls_dets, target) all_count_per_batch += len(target) else: for j in range(1, imdb.num_classes): indexes = np.where(ref_scores[:, j] > thresh)[0] if cfg.TEST.FIRST_N > 0: # todo: check whether the order affects the result sort_indices = np.argsort( ref_scores[:, j])[-cfg.TEST.FIRST_N:] # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N] indexes = np.intersect1d(sort_indices, indexes) cls_scores = ref_scores[indexes, j, np.newaxis] cls_boxes = ref_boxes[ indexes, 4:8] if cfg.CLASS_AGNOSTIC else ref_boxes[indexes, j * 4:(j + 1) * 4] # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx+delta) valid_tally += len(cls_scores) valid_sum += len(ref_scores) # print np.min(cls_scores), valid_tally, valid_sum # cls_scores = scores[:, j, np.newaxis] # cls_scores[cls_scores <= thresh] = thresh # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) if cfg.TEST.SOFTNMS: ref_all_boxes[j][idx + delta] = nms(cls_dets) else: keep = nms(cls_dets) ref_all_boxes[j][idx + delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ ref_all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( ref_all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] ref_all_boxes[j][idx + delta] = ref_all_boxes[j][ idx + delta][keep, :] if vis: ref_boxes_this_image = [[]] + [ ref_all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] if show_gt: gt_boxes = label_dict['ref_gt_boxes'][0] for gt_box in gt_boxes: gt_box = gt_box.asnumpy() gt_cls = int(gt_box[4]) gt_box = gt_box / scales[delta] gt_box[4] = 1 if cfg.TEST.LEARN_NMS: gt_box = np.append(gt_box, 1) ref_boxes_this_image[gt_cls] = np.vstack( (ref_boxes_this_image[gt_cls], gt_box)) if cfg.TEST.LEARN_NMS: target_boxes = label_dict['ref_nms_multi_target'] for target_box in target_boxes: print("ref", target_box * scales[delta]) target_cls = int(target_box[4]) + 1 target_box[4] = 2 + target_box[5] target_box[5] = target_box[6] target_box = target_box[:6] ref_boxes_this_image[target_cls] = np.vstack( (ref_boxes_this_image[target_cls], target_box)) vis_double_all_detection(data_dict['data'].asnumpy(), boxes_this_image, data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) # vis_all_detection(data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) if DEBUG: nms_full_count.append(nms_full_count_per_batch) nms_pos_count.append(nms_pos_count_per_batch) is_max_count.append(is_max_count_per_batch) all_count.append(all_count_per_batch) print("full:{} pos:{} max:{}".format( 1.0 * sum(nms_full_count) / sum(all_count), 1.0 * sum(nms_pos_count) / sum(all_count), 1.0 * sum(is_max_count) / sum(all_count))) idx += test_data.batch_size t3 = time.time() - t t = time.time() post_processing_time.append(t3) all_inference_time.append(t1 + t2 + t3) inference_count += 1 if inference_count % 200 == 0: valid_count = 500 if inference_count > 500 else inference_count print("--->> running-average inference time per batch: {}".format( float(sum(all_inference_time[-valid_count:])) / valid_count)) print("--->> running-average post processing time per batch: {}". format( float(sum(post_processing_time[-valid_count:])) / valid_count)) print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, t1, t2, t3) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, t1, t2, t3)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) # np.save('class_lut.npy', class_lut) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) # num_valid_classes = [len(x) for x in class_lut] logger.info('valid class ratio:{}'.format( np.sum(num_valid_classes) / float(num_images))) logger.info('valid score ratio:{}'.format( float(valid_tally) / float(valid_sum + 0.01)))
def predict_on_image_names( image_names, config, model_path_id="/home/data/output/resnet_v1_101_coco_fcis_end2end_ohem-nebraska/train-nebraska/e2e", epoch=8): import argparse import os import sys import logging import pprint import cv2 from utils.image import resize, transform import numpy as np # get config os.environ['PYTHONUNBUFFERED'] = '1' os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0' os.environ['MXNET_ENABLE_GPU_P2P'] = '0' cur_path = os.path.abspath(".") sys.path.insert( 0, os.path.join(cur_path, '../external/mxnet', config.MXNET_VERSION)) import mxnet as mx print("use mxnet at", mx.__file__) from core.tester import im_detect, Predictor from symbols import * from utils.load_model import load_param from utils.show_masks import show_masks from utils.tictoc import tic, toc from nms.nms import py_nms_wrapper from mask.mask_transform import gpu_mask_voting, cpu_mask_voting # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 2 classes = ['cp'] # load demo data data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # loading the last epoch that was trained, 8 arg_params, aux_params = load_param(model_path_id, epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) all_classes = [] all_configs = [] all_masks = [] all_dets = [] all_ims = [] # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print(im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting( masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print('testing {} {:.4f}s'.format(im_name, toc())) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] all_classes.append(classes) all_configs.append(config) all_masks.append(masks) all_dets.append(dets) im = cv2.imread(im_name) all_ims.append(im) return all_ims, all_dets, all_masks, all_configs, all_classes
def net_inference(model, reqs): CTX.logger.info("inference begin...") # datas = json.loads(args) predictor = model['predictor'] classes = model['classes'] # threshold uses for default threshold = 0.7 thresholds = model['thresholds'] rets = [] nms = py_nms_wrapper(config.TEST.NMS) box_voting = py_box_voting_wrapper(config.TEST.BOX_VOTING_IOU_THRESH, config.TEST.BOX_VOTING_SCORE_THRESH, with_nms=True) try: for data in reqs: try: im = load_image(data['data']['uri'], body=data['data']['body']) except ErrorBase as e: rets.append({"code":e.code, "message": e.message, "result": None}) continue # return [], 400, 'load image error' if im.shape[0] > im.shape[1]: long_side, short_side = im.shape[0], im.shape[1] else: long_side, short_side = im.shape[1], im.shape[0] if short_side > 0 and float(long_side)/float(short_side) > 50.0: msg = "aspect ration is too large, long_size:short_side should not larger than 50.0" # raise ErrorBase.__init__(400, msg) rets.append({"code": 400, "message": msg, "result": None}) continue data_batch, data_names, im_scale = generate_batch(im) scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, im_scale, config) det_ret = [] for cls_index, cls in enumerate(classes[1:], start=1): if len(cls) > 1: cls_ind = int(cls[0]) cls_name = cls[1] else: cls_ind = cls_index cls_name = cls[0] cls_boxes = boxes[0][:, 4:8] if config.CLASS_AGNOSTIC else boxes[0][:, 4 * cls_ind:4 *4 * (cls_ind + 1)] cls_scores = scores[0][:, cls_ind, np.newaxis] if len(classes) <= len(thresholds): threshold = thresholds[cls_ind] else: CTX.logger.info("Not set threshold for this %s"%(cls_index)) keep = np.where(cls_scores >= threshold)[0] dets = np.hstack((cls_boxes, cls_scores)).astype(np.float32)[keep, :] keep = nms(dets) det_ret.extend(_build_result(det, cls_name, cls_ind) for det in dets[keep, :]) rets.append( dict( code=0, message='', result=dict(detections=det_ret))) except Exception as e: # print(traceback.format_exc()) CTX.logger.info("inference error:%s"%(traceback.format_exc())) return [], 599, str(e) return rets, 0, ''
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/data2/output/fgfa_rfcn/jrdb/resnet_v1_101_flownet_jrdb/VID_train_15frames/fgfa_rfcn_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names classes = ['__background__', 'p'] num_classes = len(classes) image_names = glob.glob(args.input + '/*') image_names.sort() print("num of images", len(image_names)) #line = '%s %s %s %s\n' % (dire.replace(join(join(data_dir, ANNOTATIONS, VID)) + "/", ""), '1', str(i), str(num_of_images)) output_dir_tmp = '%s_%s_epoc_%d' % ( args.dataset, os.path.basename(args.input), args.epoc) output_dir = os.path.join("/data2", "output", output_dir_tmp) print(output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(model, args.epoc, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[idx]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = args.threshold for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, element) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) #if cfg.TEST.SOFTNMS: # nms = py_softnms_wrapper(cfg.TEST.NMS) #else: # nms = py_nms_wrapper(cfg.TEST.NMS) if cfg.TEST.SOFTNMS: nms = py_softnms_wrapper(cfg.TEST.NMS) else: nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] class_lut = [[] for _ in range(imdb.num_classes)] valid_tally = 0 valid_sum = 0 idx = 0 t = time.time() inference_count = 0 all_inference_time = [] post_processing_time = [] for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) t2 = time.time() - t t = time.time() for delta, (scores, boxes, data_dict) in enumerate( zip(scores_all, boxes_all, data_dict_all)): if cfg.TEST.LEARN_NMS: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j - 1] > thresh)[0] cls_scores = scores[indexes, j - 1:j] cls_boxes = boxes[indexes, j - 1, :] cls_dets = np.hstack((cls_boxes, cls_scores)) # count the valid ground truth if len(cls_scores) > 0: class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(scores) all_boxes[j][idx + delta] = cls_dets else: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] if cfg.TEST.FIRST_N > 0: # todo: check whether the order affects the result sort_indices = np.argsort( scores[:, j])[-cfg.TEST.FIRST_N:] # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N] indexes = np.intersect1d(sort_indices, indexes) cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 4:(j + 1) * 4] # count the valid ground truth if len(cls_scores) > 0: class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(scores) # print np.min(cls_scores), valid_tally, valid_sum # cls_scores = scores[:, j, np.newaxis] # cls_scores[cls_scores <= thresh] = thresh # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) if cfg.TEST.SOFTNMS: all_boxes[j][idx + delta] = nms(cls_dets) else: keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() post_processing_time.append(t3) all_inference_time.append(t1 + t2 + t3) inference_count += 1 if inference_count % 200 == 0: valid_count = 500 if inference_count > 500 else inference_count print("--->> running-average inference time per batch: {}".format( float(sum(all_inference_time[-valid_count:])) / valid_count)) print("--->> running-average post processing time per batch: {}". format( float(sum(post_processing_time[-valid_count:])) / valid_count)) print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, t1, t2, t3) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, t1, t2, t3)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) # np.save('class_lut.npy', class_lut) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) num_valid_classes = [len(x) for x in class_lut] logger.info('valid class ratio:{}'.format( np.sum(num_valid_classes) / float(num_images))) logger.info('valid score ratio:{}'.format( float(valid_tally) / float(valid_sum + 0.01)))
def forward(self, is_train, req, in_data, out_data, aux): nms = py_nms_wrapper(self._threshold) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) cls_prob_dict = { 'stride64': in_data[4], 'stride32': in_data[3], 'stride16': in_data[2], 'stride8': in_data[1], 'stride4': in_data[0], } bbox_pred_dict = { 'stride64': in_data[9], 'stride32': in_data[8], 'stride16': in_data[7], 'stride8': in_data[6], 'stride4': in_data[5], } pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size proposal_list = [] score_list = [] for s in self._feat_stride: stride = int(s) sub_anchors = generate_anchors(base_size=stride, scales=self._scales, ratios=self._ratios) scores = cls_prob_dict['stride' + str(s)].asnumpy()[:, self._num_anchors:, :, :] bbox_deltas = bbox_pred_dict['stride' + str(s)].asnumpy() im_info = in_data[-1].asnumpy()[0, :] # 1. Generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = int(im_info[0] / stride), int(im_info[1] / stride) # Enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_pred(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: pad = npr.choice(keep, size=post_nms_topN - len(keep)) keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # if is_train: self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def det(mod, fn): raw_img = cv2.imdecode(np.fromfile(fn, dtype=np.uint8),-1) h,w = raw_img.shape[0], raw_img.shape[1] if h > DST_SIZE or w > DST_SIZE: if h > w: raw_img = cv2.copyMakeBorder(raw_img, 0, 0, 0, h-w, cv2.BORDER_CONSTANT, value=(128,128,128)) s = DST_SIZE / float(h) else: raw_img = cv2.copyMakeBorder(raw_img, 0, w-h, 0, 0, cv2.BORDER_CONSTANT, value=(128,128,128)) s = DST_SIZE / float(w) raw_img = cv2.resize(raw_img, (DST_SIZE, DST_SIZE)) else: if h <= DST_SIZE: bottom = DST_SIZE - h if w <= DST_SIZE: right = DST_SIZE - w raw_img = cv2.copyMakeBorder(raw_img, 0, bottom, 0, right, cv2.BORDER_CONSTANT, value=(128,128,128)) im_shape = [IMG_H,IMG_W] # reverse order img = cv2.resize(raw_img, (IMG_H,IMG_W)) raw_h = img.shape[0] raw_w = img.shape[1] im_tensor = image.transform(img, [103.06,115.90,123.15]) im_info = np.array([[ IMG_H, IMG_W, 4.18300658e-01]]) batch = mx.io.DataBatch([mx.nd.array(im_tensor), mx.nd.array(im_info)]) start = time.time() mod.forward(batch) output_names = mod.output_names output_tensor = mod.get_outputs() print ("time", time.time()-start, "secs.") output = dict(zip(output_names ,output_tensor)) rois = output['rois_output'].asnumpy()[:, 1:] scores = output['cls_prob_reshape_output'].asnumpy()[0] bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0] pred_boxes = bbox_pred(rois, bbox_deltas) pred_boxes = clip_boxes(pred_boxes, im_shape[-2:]) num_classes = 2 all_cls_dets = [[] for _ in range(num_classes)] for j in range(1, num_classes): indexes = np.where(scores[:, j] > 0.1)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = pred_boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)).copy() all_cls_dets[j] = cls_dets for idx_class in range(1, num_classes): nms = py_nms_wrapper(0.3) keep = nms(all_cls_dets[idx_class]) all_cls_dets[idx_class] = all_cls_dets[idx_class][keep, :] for i in range(all_cls_dets[1].shape[0]): cv2.rectangle(img, (int(all_cls_dets[1][i][0]), int(all_cls_dets[1][i][1])) ,(int(all_cls_dets[1][i][2]), int(all_cls_dets[1][i][3])),(0,0,255),4) if img.shape[0] > 1024 or img.shape[1]> 1024: img = cv2.resize(img, (0,0), fx=0.3, fy=0.3) cv2.imshow("w", img) cv2.waitKey()
def pred_eval(gpu_id, feat_predictors, aggr_predictors, test_data, imdb, cfg, orig_pred, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_' + str(gpu_id)) if cfg.TEST.SEQ_NMS == True: det_file += '_raw' assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] num_images = test_data.size # init frame id for each video (all videos level unique id) roidb_frame_ids = [] for x in test_data.roidb: # for epic kitchen, the frame id is not continuous if 'unique_ids' in x: roidb_frame_ids.append(x['unique_ids']) else: roidb_frame_ids.append(x['frame_id']) # roidb_frame_ids = [x['frame_id'] for x in test_data.roidb] roidb_frame_seg_lens = [x['frame_seg_len'] if 'frame_seg_len' in x else x['video_len'] \ for x in test_data.roidb] gt_classes = [x['gt_classes'] for x in test_data.roidb] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] frame_ids = np.zeros(num_images, dtype=np.int) roidb_idx = -1 roidb_offset = -1 idx = 0 all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride * 2 + 1 strided_idx = [ _i for _i in range(all_frame_interval) if _i % cfg.TEST.sample_stride == 0 ] print(strided_idx, len(strided_idx)) print('all_frame_interval', all_frame_interval, 'sample stride', cfg.TEST.sample_stride) data_time, net_time, post_time, seq_time = 0.0, 0.0, 0.0, 0.0 t = time.time() # loop through all the test data for frame_offset, img_path, im_info, key_frame_flag, data_batch in test_data: ######## ### for drawing img_path = img_path.split('/')[-3:] if not os.path.exists(os.path.join(imdb.result_path, *img_path[:2])): os.makedirs(os.path.join(imdb.result_path, *img_path[:2])) ######## t1 = time.time() - t t = time.time() ################################################# # new video # ################################################# # empty lists and append padding images # do not do prediction yet if key_frame_flag == 0: roidb_idx += 1 roidb_offset = -1 roidb_frame_seg_len = roidb_frame_seg_lens[roidb_idx] # init data_lsit and feat_list for a new video data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) img_paths_list = deque(maxlen=all_frame_interval) frame_offset_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL+1 padding images in the front (first frame) while len( data_list ) < cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride + 1: data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) if cfg.TEST.KEY_FRAME_INTERVAL == 0: scales = [iim_info[0, 2] for iim_info in im_info] image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) strided_data_list = [data_list[_idx] for _idx in strided_idx] strided_feat_list = [feat_list[_idx] for _idx in strided_idx] prepare_data(strided_data_list, strided_feat_list, data_batch) pred_result, aggr_feat, nonlocal_weights = im_detect( aggr_predictors, data_batch, data_names, scales, cfg) roidb_offset += 1 if cfg.TEST.video_shuffle: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]] else: frame_ids[idx] = roidb_frame_ids[roidb_idx] + \ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride] else: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ roidb_offset] else: frame_ids[ idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, # data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if orig_pred: out_im = process_pred_result( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) else: out_im = process_pred_result_v2( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if vis: output_dir = os.path.join( imdb.result_path, *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]) cv2.imwrite(os.path.join(root_path, output_dir), out_im) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) ################################################# # main part of the loop # ################################################# elif key_frame_flag == 2: # keep appending data to the lists without doing prediction until the lists contain 2 * cfg.TEST.KEY_FRAME_INTERVAL objects if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) else: scales = [iim_info[0, 2] for iim_info in im_info] image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) strided_data_list = [data_list[_idx] for _idx in strided_idx] strided_feat_list = [feat_list[_idx] for _idx in strided_idx] prepare_data(strided_data_list, strided_feat_list, data_batch) pred_result, aggr_feat, nonlocal_weights = im_detect( aggr_predictors, data_batch, data_names, scales, cfg) roidb_offset += 1 if cfg.TEST.video_shuffle: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]] else: frame_ids[idx] = roidb_frame_ids[roidb_idx] + \ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride] else: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ roidb_offset] else: frame_ids[ idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, # data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if orig_pred: out_im = process_pred_result( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) else: out_im = process_pred_result_v2( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if vis: output_dir = os.path.join( imdb.result_path, *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]) cv2.imwrite(os.path.join(root_path, output_dir), out_im) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) ################################################# # end part of a video # ################################################# elif key_frame_flag == 1: # last frame of a video end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) ############## while len(data_list) < all_frame_interval - 1: data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) ############## scales = [iim_info[0, 2] for iim_info in im_info] while end_counter < min( roidb_frame_seg_len, cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride + 1): data_list.append(image) feat_list.append(feat) img_paths_list.append(img_path) frame_offset_list.append(frame_offset) strided_data_list = [data_list[_idx] for _idx in strided_idx] strided_feat_list = [feat_list[_idx] for _idx in strided_idx] prepare_data(strided_data_list, strided_feat_list, data_batch) pred_result, aggr_feat, _ = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) roidb_offset += 1 if cfg.TEST.video_shuffle: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]] else: frame_ids[idx] = roidb_frame_ids[roidb_idx] + \ frame_offset_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride] else: if not isinstance(roidb_frame_ids[roidb_idx], int): frame_ids[idx] = roidb_frame_ids[roidb_idx][ roidb_offset] else: frame_ids[ idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() # out_im = process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, # vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if orig_pred: out_im = process_pred_result( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) else: out_im = process_pred_result_v2( pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride].asnumpy(), scales) if vis: output_dir = os.path.join( imdb.result_path, *img_paths_list[cfg.TEST.KEY_FRAME_INTERVAL * cfg.TEST.sample_stride]) cv2.imwrite(os.path.join(root_path, output_dir), out_im) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) end_counter += 1 with open(det_file, 'wb') as f: cPickle.dump((all_boxes, frame_ids), f, protocol=cPickle.HIGHEST_PROTOCOL) return all_boxes, frame_ids
def pred_eval(gpu_id, key_predictor, cur_predictor, test_data, imdb, cfg, vis=False, thresh=1e-4, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_' + str(gpu_id) + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes, frame_ids = cPickle.load(fid) return all_boxes, frame_ids assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] num_images = test_data.size roidb_frame_ids = [x['frame_id'] for x in test_data.roidb] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] frame_ids = np.zeros(num_images, dtype=np.int) roidb_idx = -1 roidb_offset = -1 idx = 0 data_time, net_time, post_time = 0.0, 0.0, 0.0 t = time.time() for im_info, key_frame_flag, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] if key_frame_flag != 2: scores_all, boxes_all, data_dict_all, feat = im_detect( key_predictor, data_batch, data_names, scales, cfg) else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) scores_all, boxes_all, data_dict_all, _ = im_detect( cur_predictor, data_batch, data_names, scales, cfg) if key_frame_flag == 0: roidb_idx += 1 roidb_offset = 0 else: roidb_offset += 1 frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() for delta, (scores, boxes, data_dict) in enumerate( zip(scores_all, boxes_all, data_dict_all)): for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) with open(det_file, 'wb') as f: cPickle.dump((all_boxes, frame_ids), f, protocol=cPickle.HIGHEST_PROTOCOL) return all_boxes, frame_ids
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=False): det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') seg_file = os.path.join(imdb.result_path, imdb.name + '_masks.pkl') if os.path.exists(det_file) and os.path.exists( seg_file) and not ignore_cache: with open(det_file, 'rb') as f: all_boxes = cPickle.load(f) with open(seg_file, 'rb') as f: all_masks = cPickle.load(f) else: assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) # function pointers nms = py_nms_wrapper(cfg.TEST.NMS) mask_voting = gpu_mask_voting if cfg.TEST.USE_GPU_MASK_MERGE else cpu_mask_voting max_per_image = 100 if cfg.TEST.USE_MASK_MERGE else -1 num_images = imdb.num_images all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_masks = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] idx = 0 t = time.time() for data_batch in test_data: t1 = time.time() - t t = time.time() scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores_all, boxes_all, masks_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] t2 = time.time() - t t = time.time() # post processing for delta, (scores, boxes, masks, data_dict) in enumerate( zip(scores_all, boxes_all, masks_all, data_dict_all)): if not cfg.TEST.USE_MASK_MERGE: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_masks = masks[indexes, 1, :, :] try: if cfg.CLASS_AGNOSTIC: cls_boxes = boxes[indexes, :] else: raise Exception() except: cls_boxes = boxes[indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] all_masks[j][idx + delta] = cls_masks[keep, :] else: masks = masks[:, 1:, :, :] im_height = np.round(im_shapes[delta][0] / scales[delta]).astype('int') im_width = np.round(im_shapes[delta][1] / scales[delta]).astype('int') boxes = clip_boxes(boxes, (im_height, im_width)) result_mask, result_box = mask_voting( masks, boxes, scores, imdb.num_classes, max_per_image, im_width, im_height, cfg.TEST.NMS, cfg.TEST.MASK_MERGE_THRESH, cfg.BINARY_THRESH) for j in xrange(1, imdb.num_classes): all_boxes[j][idx + delta] = result_box[j] all_masks[j][idx + delta] = result_mask[j][:, 0, :, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] masks_this_image = [[]] + [ all_masks[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_mask(data_dict['data'].asnumpy(), boxes_this_image, masks_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, t1, t2, t3) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'. format(idx, imdb.num_images, t1, t2, t3)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) with open(seg_file, 'wb') as f: cPickle.dump(all_masks, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_sds(all_boxes, all_masks) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def pred_eval(gpu_id, feat_predictors, aggr_predictors, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_'+ str(gpu_id)) if cfg.TEST.SEQ_NMS == True: det_file += '_raw' print('det_file=', det_file) if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes, frame_ids = pickle.load(fid) return all_boxes, frame_ids assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] num_images = test_data.size roidb_frame_ids = [x['frame_id'] for x in test_data.roidb] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] frame_ids = np.zeros(num_images, dtype=np.int) roidb_idx = -1 roidb_offset = -1 idx = 0 all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 data_time, net_time, post_time,seq_time = 0.0, 0.0, 0.0,0.0 t = time.time() # loop through all the test data for im_info, key_frame_flag, data_batch in test_data: t1 = time.time() - t t = time.time() ################################################# # new video # ################################################# # empty lists and append padding images # do not do prediction yet if key_frame_flag == 0: roidb_idx += 1 roidb_offset = -1 # init data_lsit and feat_list for a new video data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL+1 padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL+1: data_list.append(image) feat_list.append(feat) ################################################# # main part of the loop # ################################################# elif key_frame_flag == 2: # keep appending data to the lists without doing prediction until the lists contain 2 * cfg.TEST.KEY_FRAME_INTERVAL objects if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: scales = [iim_info[0, 2] for iim_info in im_info] image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) roidb_offset += 1 frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) if logger: logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) ################################################# # end part of a video # ################################################# elif key_frame_flag == 1: # last frame of a video end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) roidb_offset += 1 frame_ids[idx] = roidb_frame_ids[roidb_idx] + roidb_offset t2 = time.time() - t t = time.time() process_pred_result(pred_result, imdb, thresh, cfg, nms, all_boxes, idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) if logger: logger.info('testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(idx, num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) end_counter += 1 with open(det_file, 'wb') as f: pickle.dump((all_boxes, frame_ids), f, protocol=pickle.HIGHEST_PROTOCOL) return all_boxes, frame_ids
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4): """ Wrapper function for mask voting, note we already know the class of boxes and masks """ masks = masks.astype(np.float32) mask_size = masks.shape[-1] nms = py_nms_wrapper(nms_thresh) # apply nms and sort to get first images according to their scores # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] num_detect = boxes.shape[0] res_mask = [[] for _ in xrange(num_detect)] for i in xrange(num_detect): box = np.round(boxes[i]).astype(int) mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1)) res_mask[i] = mask list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size)) boxes_ar = np.zeros((num_boxes, 4)) for i in xrange(num_boxes): # Get weights according to their segmentation scores cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) # Re-format mask when passing it to mask_aggregation p_mask = [res_mask[j] for j in list(cur_inds)] # do mask aggregation orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh) masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size)) boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis])) list_result_box[c] = boxes_scored_ar list_result_mask[c] = masks_ar return list_result_mask, list_result_box
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ print "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = 300 num_images = imdb.num_images # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] idx = 0 data_time, net_time, post_time = 0.0, 0.0, 0.0 t = time.time() for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, data_dict_all = im_detect( predictor, data_batch, data_names, scales, cfg) t2 = time.time() - t t = time.time() for delta, (scores, boxes, data_dict) in enumerate( zip(scores_all, boxes_all, data_dict_all)): for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) max_per_image = 0 if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] image_thresh = 0.5 for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] print len(all_boxes) if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] vis_all_detection(data_dict['data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) idx += test_data.batch_size t3 = time.time() - t t = time.time() data_time += t1 net_time += t2 post_time += t3 print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, imdb.num_images, data_time / idx * test_data.batch_size, net_time / idx * test_data.batch_size, post_time / idx * test_data.batch_size)) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 2 classes = [ '__background__', # always index 0 '1' ] # load demo data image_names = [] names_dirs = os.listdir(cur_path + '/../' + test_dir) for im_name in names_dirs: if im_name[-4:] == '.jpg' or im_name[-4:] == '.png': image_names.append(im_name) data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../' + test_dir + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../' + test_dir + im_name, cv2.IMREAD_COLOR | long(128)) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] #print "before scale: " #print im.shape im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print "after scale: " #print im.shape #im_scale = 1.0 #print "scale ratio: " #print im_scale im_tensor = transform(im, config.network.PIXEL_MEANS) #print im_tensor.shape im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../' + model_dir, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, [1.0], config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] #print im_shapes if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] result_masks, result_dets = gpu_mask_voting( masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1], im_shapes[0][0], config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../' + test_dir + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False) # Save img cv2.imwrite(cur_path + '/../' + result_dir + im_name, cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) print 'done'
def aggregateSingle(self, scale_cls_dets, scale_cls_masks, vis=False, cache_name='cache', vis_path=None, vis_name=None, vis_ext='.png'): n_scales = len(scale_cls_dets) assert n_scales == len( self.cfg.TEST.VALID_RANGES ), 'A valid range should be specified for each test scale' all_boxes = [[[] for _ in range(self.num_images)] for _ in range(self.num_classes)] all_masks = [[[] for _ in range(self.num_images)] for _ in range(self.num_classes)] if len(scale_cls_dets) > 1: self.show_info( 'Aggregating detections from multiple scales and applying NMS...' ) else: self.show_info('Performing NMS on detections...') # TODO: change the hard code here, change it to soft_nms or mask_nms nms = py_nms_wrapper(0.3) # nms = gpu_nms_wrapper(0.3, 0) # Apply ranges and store detections per category for i in range(self.num_images): for j in range(1, self.num_classes): agg_dets = np.empty((0, 5), dtype=np.float32) agg_masks = np.empty((0, 28, 28), dtype=np.float32) for all_cls_dets, all_cls_masks, valid_range in zip( scale_cls_dets, scale_cls_masks, self.cfg.TEST.VALID_RANGES): cls_dets = all_cls_dets[j][i] cls_masks = all_cls_masks[j][i] heights = cls_dets[:, 2] - cls_dets[:, 0] widths = cls_dets[:, 3] - cls_dets[:, 1] areas = widths * heights lvalid_ids = np.where(areas > valid_range[0]*valid_range[0])[0] if valid_range[0] > 0 else \ np.arange(len(areas)) uvalid_ids = np.where(areas <= valid_range[1]*valid_range[1])[0] if valid_range[1] > 0 else \ np.arange(len(areas)) valid_ids = np.intersect1d(lvalid_ids, uvalid_ids) cls_dets = cls_dets[ valid_ids, :] if len(valid_ids) > 0 else cls_dets cls_masks = cls_masks[ valid_ids, :, :] if len(valid_ids) > 0 else cls_masks # pdb.set_trace() agg_dets = np.vstack( (agg_dets, cls_dets.astype(np.float32))) # pdb.set_trace() agg_masks = np.concatenate((agg_masks, cls_masks), axis=0) # start = timeit.default_timer() keep = nms(agg_dets) # stop = timeit.default_timer() # print 'nms time: ', stop - start all_boxes[j][i] = agg_dets[keep, :] all_masks[j][i] = agg_masks[keep, :] # parallel_nms_args[int(i/n_roi_per_pool)].append(agg_dets) # Divide roidb and perform NMS in parallel to reduce the memory usage # TODO: change to multi process later # Limit number of detections to MAX_PER_IMAGE if requested and visualize if vis is True for i in range(self.num_images): if self.cfg.TEST.MAX_PER_IMAGE > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, self.num_classes) ]) if len(image_scores) > self.cfg.TEST.MAX_PER_IMAGE: image_thresh = np.sort( image_scores)[-self.cfg.TEST.MAX_PER_IMAGE] for j in range(1, self.num_classes): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] all_masks[j][i] = all_masks[j][i][keep, :] if vis: visualization_path = vis_path if vis_path else os.path.join( self.cfg.TEST.VISUALIZATION_PATH, cache_name) if not os.path.isdir(visualization_path): os.makedirs(visualization_path) import cv2 im = cv2.cvtColor(cv2.imread(self.roidb[i]['image']), cv2.COLOR_BGR2RGB) visualize_masks( im, [[]] + [all_boxes[j][i] for j in range(1, self.num_classes)], [[]] + [all_masks[j][i] for j in range(1, self.num_classes)], 1.0, self.cfg.network.PIXEL_MEANS, self.class_names, threshold=0.5, save_path=os.path.join( visualization_path, '{}{}'.format(vis_name if vis_name else i, vis_ext)), transform=False) if cache_name: cache_path = os.path.join(self.result_path, cache_name) if not os.path.isdir(cache_path): os.makedirs(cache_path) cache_path = os.path.join(cache_path, 'detections.pkl') self.show_info( 'Done! Saving detections into: {}'.format(cache_path)) with open(cache_path, 'wb') as detfile: cPickle.dump(all_boxes, detfile) return all_boxes, all_masks
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image num_images = imdb.num_images for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES): det_file_single_scale = os.path.join(imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl') # if os.path.exists(det_file_single_scale): # continue cfg.SCALES = [test_scale] test_data.reset() # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes_single_scale = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] detect_at_single_scale(predictor, data_names, imdb, test_data, cfg, thresh, vis, all_boxes_single_scale, logger) with open(det_file_single_scale, 'wb') as f: cPickle.dump(all_boxes_single_scale, f, protocol=cPickle.HIGHEST_PROTOCOL) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES): det_file_single_scale = os.path.join(imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl') if os.path.exists(det_file_single_scale): with open(det_file_single_scale, 'rb') as fid: all_boxes_single_scale = cPickle.load(fid) for idx_class in range(1, imdb.num_classes): for idx_im in range(0, num_images): if len(all_boxes[idx_class][idx_im]) == 0: all_boxes[idx_class][idx_im] = all_boxes_single_scale[idx_class][idx_im] else: all_boxes[idx_class][idx_im] = np.vstack((all_boxes[idx_class][idx_im], all_boxes_single_scale[idx_class][idx_im])) for idx_class in range(1, imdb.num_classes): for idx_im in range(0, num_images): if cfg.TEST.USE_SOFTNMS: soft_nms = py_softnms_wrapper(cfg.TEST.SOFTNMS_THRESH, max_dets=max_per_image) all_boxes[idx_class][idx_im] = soft_nms(all_boxes[idx_class][idx_im]) else: nms = py_nms_wrapper(cfg.TEST.NMS) keep = nms(all_boxes[idx_class][idx_im]) all_boxes[idx_class][idx_im] = all_boxes[idx_class][idx_im][keep, :] if max_per_image > 0: for idx_im in range(0, num_images): image_scores = np.hstack([all_boxes[j][idx_im][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0] all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :] with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def pred_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image num_images = imdb.num_images for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES): det_file_single_scale = os.path.join( imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl') # if os.path.exists(det_file_single_scale): # continue cfg.SCALES = [test_scale] test_data.reset() # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes_single_scale = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] detect_at_single_scale(predictor, data_names, imdb, test_data, cfg, thresh, vis, all_boxes_single_scale, logger) with open(det_file_single_scale, 'wb') as f: cPickle.dump(all_boxes_single_scale, f, protocol=cPickle.HIGHEST_PROTOCOL) # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] for test_scale_index, test_scale in enumerate(cfg.TEST_SCALES): det_file_single_scale = os.path.join( imdb.result_path, imdb.name + '_detections_' + str(test_scale_index) + '.pkl') if os.path.exists(det_file_single_scale): with open(det_file_single_scale, 'rb') as fid: all_boxes_single_scale = cPickle.load(fid) for idx_class in range(1, imdb.num_classes): for idx_im in range(0, num_images): if len(all_boxes[idx_class][idx_im]) == 0: all_boxes[idx_class][idx_im] = all_boxes_single_scale[ idx_class][idx_im] else: all_boxes[idx_class][idx_im] = np.vstack( (all_boxes[idx_class][idx_im], all_boxes_single_scale[idx_class][idx_im])) for idx_class in range(1, imdb.num_classes): for idx_im in range(0, num_images): if cfg.TEST.USE_SOFTNMS: soft_nms = py_softnms_wrapper(cfg.TEST.SOFTNMS_THRESH, max_dets=max_per_image) all_boxes[idx_class][idx_im] = soft_nms( all_boxes[idx_class][idx_im]) else: nms = py_nms_wrapper(cfg.TEST.NMS) keep = nms(all_boxes[idx_class][idx_im]) all_boxes[idx_class][idx_im] = all_boxes[idx_class][idx_im][ keep, :] if max_per_image > 0: for idx_im in range(0, num_images): image_scores = np.hstack([ all_boxes[j][idx_im][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx_im][:, -1] >= image_thresh)[0] all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :] with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, protocol=cPickle.HIGHEST_PROTOCOL) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str))
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_fpn_dcn_rcnn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) max_per_image = config.TEST.max_per_image # Print the test scales print("Train scales: %s" % str(config.SCALES)) print("Test scales: %s" % str(config.TEST_SCALES)) # load demo data #dataBaseDir = '/b_test/pkhan/datasets/Receipts/data/' dataBaseDir = '/netscratch/queling/data/' outputBaseDir = '/netscratch/queling/Deformable/output/fpn/deep_receipt/results/' + EXPERIMENT_NAME #outputBaseDir = '/b_test/pkhan/Code/Deformable/output/' + EXPERIMENT_NAME if os.path.exists(outputBaseDir): shutil.rmtree(outputBaseDir) os.mkdir(outputBaseDir) outputFile = open(os.path.join(outputBaseDir, 'output.txt'), 'w') outputFile.write('<?xml version="1.0" encoding="UTF-8"?>\n') errorStatsFile = open( os.path.join(outputBaseDir, 'incorrect-detections.txt'), 'w') incorrectDetectionResultsPath = os.path.join(outputBaseDir, 'IncorrectDetections') if not os.path.exists(incorrectDetectionResultsPath): os.mkdir(incorrectDetectionResultsPath) detectionResultsPath = os.path.join(outputBaseDir, 'Detections') if not os.path.exists(detectionResultsPath): os.mkdir(detectionResultsPath) annotationResultsPath = os.path.join(outputBaseDir, 'Annotations') if not os.path.exists(annotationResultsPath): os.mkdir(annotationResultsPath) statistics = {} for cls_ind, cls in enumerate(CLASSES): statistics[cls] = {} for thresh in IoU_THRESHOLDS: statistics[cls][thresh] = {} statistics[cls][thresh]["truePositives"] = 0 statistics[cls][thresh]["falsePositives"] = 0 statistics[cls][thresh]["falseNegatives"] = 0 statistics[cls][thresh]["precision"] = 0 statistics[cls][thresh]["recall"] = 0 statistics[cls][thresh]["fMeasure"] = 0 im_names_file = open(os.path.join(dataBaseDir, 'ImageSets/image.txt'), 'r') #test.txt for whole dataset, image.txt for one for im_name in im_names_file: im_name = im_name.strip() # print ("Processing file: %s" % (im_name)) found = False for ext in IMAGE_EXTENSIONS: im_name_with_ext = im_name + ext im_path = os.path.join( dataBaseDir, 'Test', im_name_with_ext) #Images for whole dataset, Test for one if os.path.exists(im_path): found = True break if not found: print("Error: Unable to locate file %s" % (im_name)) exit(-1) # Load GT annotations xml_path = os.path.join(dataBaseDir, 'Annotations', im_name + '.xml') #gtBBoxes = loadGTAnnotationsFromXML(xml_path) tic() dets_nms = [[] for j in range(len(TOTAL_CLASSES) - 1)] for testScale in config.SCALES: data = [] im = cv2.imread(im_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = testScale[0] max_size = testScale[1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, testScale[0], testScale[1]))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # arg_params, aux_params = load_param(cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), 0, process=True) arg_params, aux_params = load_param(MODEL_PATH, MODEL_EPOCH, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # # warm up for j in xrange(2): data_batch = mx.io.DataBatch( data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) # test image_names = [im_name] # Way around for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') # TODO: Multi-scale testing for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4: 8] if config.CLASS_AGNOSTIC else boxes[:, j * 4: (j + 1 ) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) # if config.TEST.USE_SOFTNMS: # soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image) # cls_dets = soft_nms(cls_dets) # else: # nms = py_nms_wrapper(config.TEST.NMS) # keep = nms(cls_dets) # cls_dets = cls_dets[keep, :] # cls_dets = cls_dets[cls_dets[:, -1] > confidenceThreshold, :] # dets_nms.append(cls_dets) if len(dets_nms[j - 1]) == 0: dets_nms[j - 1] = cls_dets else: dets_nms[j - 1] += cls_dets finalDetections = [] for clsIter in range(len(dets_nms)): # print ("Performing NMS on cls %d with %d boxes" % (clsIter, len(dets_nms[clsIter]))) if config.TEST.USE_SOFTNMS: soft_nms = py_softnms_wrapper(config.TEST.SOFTNMS_THRESH, max_dets=max_per_image) # cls_dets = soft_nms(dets_nms[clsIter]) dets_nms[clsIter] = soft_nms(dets_nms[clsIter]) else: nms = py_nms_wrapper(config.TEST.NMS) keep = nms(dets_nms[clsIter]) # cls_dets = dets_nms[clsIter][keep, :] dets_nms[clsIter] = dets_nms[clsIter][keep, :] dets_nms[clsIter] = dets_nms[clsIter][ dets_nms[clsIter][:, -1] > CONFIDENCE_THRESHOLD, :] # if max_per_image > 0: # for idx_im in range(0, num_images): # image_scores = np.hstack([all_boxes[j][idx_im][:, -1] # for j in range(1, imdb.num_classes)]) # if len(image_scores) > max_per_image: # image_thresh = np.sort(image_scores)[-max_per_image] # for j in range(1, imdb.num_classes): # keep = np.where(all_boxes[j][idx_im][:, -1] >= image_thresh)[0] # all_boxes[j][idx_im] = all_boxes[j][idx_im][keep, :] print 'Processing image: {} {:.4f}s'.format(im_name, toc()) # Add detections on the image im = cv2.imread( im_path) # Reload the image since the previous one was scaled item = 0 price = 0 asd = 0 row = 0 for cls_idx, cls_name in enumerate(CONCERNED_ERRORS): cls_dets = dets_nms[cls_idx] for det in cls_dets: predictedBBox = det[:4] cv2.rectangle(im, (int(predictedBBox[0]), int(predictedBBox[1])), (int(predictedBBox[2]), int(predictedBBox[3])), (0, 0, 255), 1) w = predictedBBox[2] - predictedBBox[0] cv2.putText(im, cls_name, (int(predictedBBox[0] + (w / 2.0) - 100), int(predictedBBox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 255, 0), 1) crop_im = im[int(predictedBBox[1]):int(predictedBBox[3]), int(predictedBBox[0]):int(predictedBBox[2])] gray = cv2.cvtColor(crop_im, cv2.COLOR_BGR2GRAY) if cls_name == "price": asd = price + 1 price = price + 1 new_path = outputBaseDir + "/price/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, crop_im) elif cls_name == "item_name": item = item + 1 asd = item new_path = outputBaseDir + "/item/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == "row": row = row + 1 asd = row new_path = outputBaseDir + "/row/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join( new_path, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == 'total_price': print("Found Total") new_path = outputBaseDir + "/total/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join(new_path, cls_name + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) elif cls_name == 'header': new_path = outputBaseDir + "/header/" if not os.path.exists(new_path): os.makedirs(new_path) outputImagePath = os.path.join(new_path, cls_name + ".jpg") # print ("Writing image: %s" % (outputImagePath)) gray = cv2.medianBlur(gray, 3) cv2.imwrite(outputImagePath, gray) outputImagePath = os.path.join(outputBaseDir, cls_name + str(asd) + ".jpg") # print ("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, crop_im) text = pytesseract.image_to_string(Image.open(outputImagePath)) #if text != "": # print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") #print("") #print(cls_name+": "+text) #print(" ") items = [] for k in range(1, item): path_item = outputBaseDir + "/item/item_name" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) #text_item = spellCheck.main(text_item, "product") print(str(k) + ": " + text_item) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) items = items + [text_item] print("-------------------------------------------------------------") prices = [] for k in range(1, price): path_item = outputBaseDir + "/price/price" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item), config="--psm 13") print(str(k) + ": " + text_item) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) prices = prices + [text_item] print( "-------------------------------------------------------------" ) rows = [] for k in range(1, row): path_item = outputBaseDir + "/row/row" + str(k) + ".jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) if text_item == "": print("empty and not relevant") #print(type(text_item)) else: import unicodedata #print(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore')) #print(type(unicodedata.normalize('NFKD', text_item).encode('ascii','ignore'))) rows = rows + [text_item] print(str(k) + ": " + text_item) # write total in result.txt path_item = outputBaseDir + "/total/total_price.jpg" text_item = pytesseract.image_to_string(Image.open(path_item)) f = open("/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(text_item + "\n") f.close() #path_item = outputBaseDir+"/header/header.jpg" #text_item = pytesseract.image_to_string(Image.open(path_item)) #print("Header: "+text_item) found = False for k in range(0, len(items)): for l in range(0, len(rows)): #print(type(items[k])) #print(type(rows[l])) if items[k].encode('ascii', 'ignore') in rows[l].encode( 'ascii', 'ignore'): for m in range(0, len(prices)): #print(type(prices[m].encode('ascii' ,'ignore'))) if prices[k].encode('ascii', 'ignore') in rows[l].encode( 'ascii', 'ignore'): #items[k] = spellCheck.main(items[k], "product") f = open( "/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(items[k] + "\n") f.write(str(prices[m]) + "\n") f.close() found = True # Product not found in row if (found == False): #items[k] = spellCheck.main(items[k], "product") f = open("/netscratch/queling/Deformable/fpn/results.txt", "a") f.write(items[k] + "\n") f.write(" " + "\n") f.close() found = False # Add gt annotations #for bbox in gtBBoxes: # if bbox[5] in CONCERNED_ERRORS: # cv2.rectangle(im, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 1) # Computate the statistics for the current image #statistics, classificationErrorMessage = computeStatistics(dets_nms, gtBBoxes, statistics, IoU_THRESHOLDS) #if classificationErrorMessage is not None: # print ("Writing incorrect image: %s" % (im_name)) # errorStatsFile.write("%s: %s\n" % (im_name, classificationErrorMessage)) # cv2.imwrite(os.path.join(incorrectDetectionResultsPath, im_name + '.jpg'), im) # Write the output in ICDAR Format outputFile.write(convertToXML(im_name_with_ext, dets_nms)) if WRITE_DETECTION_RESULTS: # visualize # im = cv2.imread(im_path) # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) # # Get also the plot for saving on server # _, plt = show_boxes(im, dets_nms, CLASSES, 1, returnPlt=True) # plt.savefig(os.path.join(outputBaseDir, 'Detections', im_name[:im_name.rfind('.')] + ".png")) outputImagePath = os.path.join(detectionResultsPath, im_name + ".jpg") print("Writing image: %s" % (outputImagePath)) cv2.imwrite(outputImagePath, im) if WRITE_ANNOTATION_RESULTS: exportToPascalVOCFormat(im_name, im_path, dets_nms, annotationResultsPath) outputFile.close() errorStatsFile.close() total_classes = 0 total_F_Meausere = 0 average_F_Meausere = 0 # Compute final precision and recall outputFile = open( os.path.join(outputBaseDir, 'output-stats-' + EXPERIMENT_NAME + '.txt'), 'w')
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # load demo data image_names = ['COCO_test2015_000000000275.jpg', 'COCO_test2015_000000001412.jpg', 'COCO_test2015_000000073428.jpg', 'COCO_test2015_000000393281.jpg'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR)# | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/fcis_coco', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, scales, config) im_shapes = [data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data))] if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] im_height = np.round(im_shapes[0][0] / scales[0]).astype('int') im_width = np.round(im_shapes[0][1] / scales[0]).astype('int') print (im_height, im_width) boxes = clip_boxes(boxes[0], (im_height, im_width)) result_masks, result_dets = gpu_mask_voting(masks, boxes, scores[0], num_classes, 100, im_width, im_height, config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [result_masks[j][:, 0, :, :] for j in range(1, num_classes)] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:,-1]>0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../demo/' + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config) print 'done'
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/fgfa_rfcn_vid_s' # 关键帧间隔*2+1为所有帧的间隔,论文中设置的KEY_FRAME_INTERVAL为10 all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL + 1 # all_frame_interval = 7 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_plot_symbol(cfg) # set up class names num_classes = 2 classes = ['__background__', 'smoke'] # load demo data image_names = sorted( glob.glob(cur_path + '/../data/IR_smoke/Data/VID/val/8/*.png')) output_dir = cur_path + '/../demo/rfcn_fgfa_8_agg_1/' if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (6, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((6, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 1, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[idx]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, element) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) flow = plot_feature(aggr_predictors, data_batch) # print flow.shape # if (cfg.TEST.SEQ_NMS==False): if file_idx == 20: # print flow.shape # flow = flow.reshape(19, 24, -1) # print flow.shape # step = 3 # plt.quiver(np.arange(0, flow.shape[1], step), np.arange(flow.shape[0], -1, -step), # flow[::step, ::step, 0], flow[::step, ::step, 1]) # # plt.savefig(output_dir + '/' + str(i) + '.png') # plt.cla() for i in range(len(flow)): print flow[i].shape flow[i] = flow[i].reshape(19, 24, -1) print flow[i].shape step = 2 plt.quiver(np.arange(0, flow[i].shape[1], step), np.arange(flow[i].shape[0], -1, -step), flow[i][::step, ::step, 0], flow[i][::step, ::step, 1]) plt.savefig(output_dir + '/' + str(i) + '.png') plt.cla() # plt.show() # flow[i] = flow[i].reshape(-1, 19, 24) # print flow[i].shape # rgb_flow = flow2rgb(20 * flow[i], max_value=None) # to_save = (rgb_flow * 255).astype(np.uint8).transpose(1, 2, 0) # imwrite(output_dir+'/'+str(i)+'.png', to_save) break print 'testing {} '.format(str(file_idx) + '.png') file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) flow = plot_feature(aggr_predictors, data_batch) # print flow # if (cfg.TEST.SEQ_NMS == False): # save_image(output_dir, file_idx, out_im) # print 'testing {} {:.4f}s'.format(str(file_idx) + '.png', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 # break print 'done'