def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol_rfcn(config, is_train=False) # load demo data image_names = ['000057.jpg', '000149.jpg', '000351.jpg', '002535.jpg'] image_all = [] # ground truth boxes gt_boxes_all = [np.array([[132, 52, 384, 357]]), np.array([[113, 1, 350, 360]]), np.array([[0, 27, 329, 155]]), np.array([[8, 40, 499, 289]])] gt_classes_all = [np.array([3]), np.array([16]), np.array([7]), np.array([12])] data = [] for idx, im_name in enumerate(image_names): assert os.path.exists(cur_path + '/../demo/deform_psroi/' + im_name), \ ('%s does not exist'.format('../demo/deform_psroi/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_psroi/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) gt_boxes = gt_boxes_all[idx] gt_boxes = np.round(gt_boxes * im_scale) data.append({'data': im_tensor, 'rois': np.hstack((np.zeros((gt_boxes.shape[0], 1)), gt_boxes))}) # get predictor data_names = ['data', 'rois'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_psroi', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) cls_offset = output[0]['rfcn_cls_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) boxes = gt_boxes_all[idx] show_dpsroi_offset(im, boxes, cls_offset, gt_classes_all[idx])
def main(): # get symbol pprint.pprint(config) sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data image_names = ['000240.jpg', '000437.jpg', '004072.jpg', '007912.jpg'] image_all = [] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/deform_conv/' + im_name), \ ('%s does not exist'.format('../demo/deform_conv/' + im_name)) im = cv2.imread(cur_path + '/../demo/deform_conv/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) image_all.append(im) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/deform_conv', 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # test for idx, _ in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) output = predictor.predict(data_batch) res5a_offset = output[0]['res5a_branch2b_offset_output'].asnumpy() res5b_offset = output[0]['res5b_branch2b_offset_output'].asnumpy() res5c_offset = output[0]['res5c_branch2b_offset_output'].asnumpy() im = image_all[idx] im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_dconv_offset(im, [res5c_offset, res5b_offset, res5a_offset])
def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx, max_data_shape): # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def loadModel(self): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] # pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # get predictor data_names = ['data', 'im_info'] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[max_data_shape[0][0], ('im_info', (1L, 3L))]] provide_label = [None] arg_params, aux_params = load_param(cur_path + config.PREMODEL, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) return ctx_id, data_names, predictor
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger = None, output_path = None): pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) if has_rpn: sym_instance = eval(cfg.symbol+'.'+cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train = False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.get_roidb() else: assert False,'do not support' test_data = TestLoader(roidb,cfg,batch_size = len(ctx),shuffle = shuffle, has_rpn = has_rpn) arg_params, aux_params = load_param(prefix, epoch, process = True) data_shape_dict = dict(test_data.provide_data) sym_instance.infer_shape(data_shape_dict) data_names = [k[0] for k in test_data.provide_data] label_names = None max_data_shape = [('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([int(v[1]//16*16) for v in cfg.SCALES])))] predictor = Predictor(sym, data_names, label_names, context = ctx, max_data_shapes = max_data_shape, provide_data = test_data.provide_data,provide_label = test_data.provide_label, arg_params = arg_params, aux_params = aux_params) pred_eval(predictor, test_data, imdb, cfg, vis = vis, ignore_cache = ignore_cache, thresh = thresh, logger = logger)
def get_predictor_impression_offline(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx): # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data_oldkey', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_newkey', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cur', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('impression', (1, 1024, 38, 63)), ('key_feat_task', (1, 1024, 38, 63))]] # create predictor print 'provide_data', test_data.provide_data predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def __init__(self): sym_instance = eval(config.symbol + '.' + config.symbol)() self.symbol = sym_instance.get_symbol(config, is_train=False) self.classes = ['box', 'robot'] logging.debug("Classes: {}".format(self.classes)) self.scales = config.SCALES[0] logging.debug("Scales: {}".format(self.scales)) self.data_shape_conf = [[('data', (1, 3, self.scales[0], self.scales[1])), ('im_info', (1, 3))]] self.arg_params, self.aux_params = load_param(os.path.join( cur_path, '..', 'models', "rfcn_voc"), 0, process=True) self.data_names = ['data', 'im_info'] self.predictor = Predictor(self.symbol, ['data', 'im_info'], [], context=[mx.gpu(0)], max_data_shapes=self.data_shape_conf, provide_data=self.data_shape_conf, provide_label=[None], arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) logging.info("Deformable detector initialized")
def test_rcnn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print cfg pprint.pprint(cfg) logger.info('testing cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol and testing data if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) gt_roidb = imdb.gt_roidb() roidb = eval('imdb.' + proposal + '_roidb')(gt_roidb) # get test data iter test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, cfg, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def test_rpn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, shuffle, thresh, logger=None, output_path=None): # set up logger if not logger: logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) # rpn generate proposal cfg cfg.TEST.HAS_RPN = True # print cfg pprint.pprint(cfg) logger.info('testing rpn cfg:{}\n'.format(pprint.pformat(cfg))) # load symbol sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rpn(cfg, is_train=False) # load dataset and prepare imdb for training imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path) roidb = imdb.gt_roidb() test_data = TestLoader(roidb, cfg, batch_size=len(ctx), shuffle=shuffle, has_rpn=True) # load model arg_params, aux_params = load_param(prefix, epoch) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) # check parameters sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data[0]] label_names = None if test_data.provide_label[0] is None else [k[0] for k in test_data.provide_label[0]] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start testing imdb_boxes = generate_proposals(predictor, test_data, imdb, cfg, vis=vis, thresh=thresh) all_log_info = imdb.evaluate_recall(roidb, candidate_boxes=imdb_boxes) logger.info(all_log_info)
def test_deeplab(): epoch = config.TEST.test_epoch ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] image_set = config.dataset.test_image_set root_path = config.dataset.root_path dataset = config.dataset.dataset dataset_path = config.dataset.dataset_path logger, final_output_path, experiments_path, _ = create_env(config.output_path, args.cfg, image_set) prefix = os.path.join(final_output_path, '..', '_'.join([iset for iset in config.dataset.image_set.split('+')]), config.TRAIN.model_prefix) # print config logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=experiments_path) segdb = imdb.gt_segdb() #get test data iter batch_size = (config.TEST.BATCH_IMAGES) * len(ctx) mctx = ctx test_data = TestDataLoader(segdb, config=config, batch_size=batch_size,shuffle=False,ctx=mctx,has_label=imdb.has_label) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) arg_params, aux_params = load_param(prefix, epoch, process=True) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = ['label'] max_data_shape = [[('data', (config.TEST.BATCH_IMAGES, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection args.ignore_cache = True pred_eval(predictor, test_data, imdb, vis=args.vis, ignore_cache=args.ignore_cache, logger=logger)
def get_net(cfg, ctx, prefix, epoch, has_rpn): try: if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append( ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) except Exception, e: print(traceback.format_exc()) predictor = None
def test_fcis(config, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, ignore_cache, shuffle, has_rpn, proposal, thresh, logger=None, output_path=None): if not logger: assert False, 'require a logger' # print config pprint.pprint(config) logger.info('testing config:{}\n'.format(pprint.pformat(config))) # load symbol and testing data if has_rpn: sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) imdb = eval(dataset)(image_set, root_path, dataset_path, result_path=output_path, binary_thresh=config.BINARY_THRESH, mask_size=config.MASK_SIZE) sdsdb = imdb.gt_sdsdb() else: raise NotImplementedError # get test data iter test_data = TestLoader(sdsdb, config, batch_size=len(ctx), shuffle=shuffle, has_rpn=has_rpn) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] if not has_rpn: raise NotImplementedError() # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # start detection pred_eval(predictor, test_data, imdb, config, vis=vis, ignore_cache=ignore_cache, thresh=thresh, logger=logger)
def get_predictor(sym, image, arg_params, aux_params): data = [] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(image, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def get_net(cfg, ctx, arg_params, aux_params, has_rpn): # pylint: disable=eval-used if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append( ('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) return predictor
def load_data_and_get_predictor(self, image_names): # load demo data #image_names = ['COCO_test2015_000000000891.jpg', # 'COCO_test2015_000000001669.jpg'] data = [] for im_name in image_names: #assert os.path.exists( # cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) #im = cv2.imread(cur_path + '/../demo/' + im_name, # cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor self.data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in self.data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max( [v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(self.data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] self.predictor = Predictor(self.sym, self.data_names, label_names, context=[mx.gpu(1)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=self.arg_params, aux_params=self.aux_params) self.nms = gpu_nms_wrapper(config.TEST.NMS, 0) return data
def load_rfcn_model(cfg,has_rpn,prefix,epoch,ctx): if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # sym.save('1.json') # load model arg_params, aux_params = load_param(prefix, epoch, process=True) num_gpu = len(ctx) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (num_gpu, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (num_gpu, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (num_gpu, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) return predictor
def demo_rfcn(cfg, dataset, image_set, root_path, dataset_path, ctx, prefix, epoch, vis, has_rpn, thresh, use_box_voting): if has_rpn: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol(cfg, is_train=False) else: sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() sym = sym_instance.get_symbol_rcnn(cfg, is_train=False) # load model arg_params, aux_params = load_param(prefix, epoch, process=True) # infer shape SHORT_SIDE = config.SCALES[0][0] LONG_SIDE = config.SCALES[0][1] DATA_NAMES = ['data', 'im_info'] LABEL_NAMES = None DATA_SHAPES = [('data', (1, 3, LONG_SIDE, SHORT_SIDE)), ('im_info', (1, 3))] LABEL_SHAPES = None data_shape_dict = dict(DATA_SHAPES) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES])))]] if not has_rpn: max_data_shape.append(('rois', (cfg.TEST.PROPOSAL_POST_NMS_TOP_N + 30, 5))) # create predictor predictor = Predictor(sym, DATA_NAMES, LABEL_NAMES, context=ctx, max_data_shapes=max_data_shape, provide_data=[DATA_SHAPES], provide_label=[LABEL_SHAPES], arg_params=arg_params, aux_params=aux_params) demo_net(predictor, dataset, image_set, root_path, dataset_path, thresh, vis, use_box_voting)
def init_net(self): config = self.cfg # get symbol sym_instance = resnet_v1_101_fcis() sym = sym_instance.get_symbol(config, is_train=False) # key parameters data_names = ['data', 'im_info'] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] self.data_batch_wr = DataBatchWrapper(target_size, max_size, image_stride=config.network.IMAGE_STRIDE, pixel_means=config.network.PIXEL_MEANS, data_names=data_names, label_names=label_names) im = np.zeros((target_size,max_size,3)) data_tensor_info = self.data_batch_wr.get_data_tensor_info(im) # get predictor arg_params, aux_params = load_param_file(self.model_path, process=True) print("\nLoaded model %s\n"%(self.model_path)) self.net = Predictor(sym, data_names, label_names, context=[mx.gpu(self.ctx_id[0])], max_data_shapes=max_data_shape, provide_data=[[(k, v.shape) for k, v in zip(data_names, data_tensor_info)]], provide_label=[None], arg_params=arg_params, aux_params=aux_params) self.data_names = data_names # # warm up predictor for i in xrange(2): data_batch = self.data_batch_wr.get_data_batch(im) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] _, _, _, _ = im_detect(self.net, data_batch, data_names, scales, config)
def get_predictor(sym, sym_instance, cfg, arg_params, aux_params, test_data, ctx): # infer shape data_shape_dict = dict(test_data.provide_data_single) sym_instance.infer_shape(data_shape_dict) sym_instance.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = None H = max([v[0] for v in cfg.SCALES]) W = max([v[1] for v in cfg.SCALES]) H = int(np.ceil(H * 1.0 / cfg.network.RPN_FEAT_STRIDE)) W = int(np.ceil(W * 1.0 / cfg.network.RPN_FEAT_STRIDE)) T = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (T, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ]] # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) return predictor
def test_net(args): # init config cfg_path = args.cfg update_config(cfg_path) # test parameters has_rpn = config.TEST.HAS_RPN if not has_rpn: raise NotImplementedError, "Network without RPN is not implemented" # load model model_path = args.model if '.params' not in model_path: model_path += ".params" assert osp.exists(model_path), ("Could not find model path %s" % (model_path)) arg_params, aux_params = load_param_file(model_path, process=True) print("\nLoaded model %s\n" % (model_path)) # gpu stuff ctx = [mx.gpu(int(i)) for i in config.gpus.split(',')] # load test dataset cfg_ds = config.dataset ds_name = cfg_ds.dataset ds_path = cfg_ds.dataset_path test_image_set = cfg_ds.test_image_set # logger logger, output_path = create_logger(config.output_path, args.cfg, config.dataset.test_image_set) logger.info('testing config:{}\n'.format(pprint.pformat(config))) if ds_name.lower() == "labelme": # from utils.load_data import load_labelme_gt_sdsdb imdb = labelme(test_image_set, ds_path, cfg_ds.root_path, mask_size=config.MASK_SIZE, binary_thresh=config.BINARY_THRESH, classes=cfg_ds.CLASSES) else: imdb = eval(ds_name)(test_image_set, cfg_ds.root_path, ds_path, result_path=output_path, binary_thresh=config.BINARY_THRESH, mask_size=config.MASK_SIZE) sdsdb = imdb.gt_sdsdb() # load network network = resnet_v1_101_fcis() sym = network.get_symbol(config, is_train=False) # get test data iter test_data = TestLoader(sdsdb, config, batch_size=len(ctx), shuffle=args.shuffle, has_rpn=has_rpn) # infer shape data_shape_dict = dict(test_data.provide_data_single) network.infer_shape(data_shape_dict) network.check_parameter_shapes(arg_params, aux_params, data_shape_dict, is_train=False) # decide maximum shape data_names = [k[0] for k in test_data.provide_data_single] label_names = [] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] # # create predictor predictor = Predictor(sym, data_names, label_names, context=ctx, max_data_shapes=max_data_shape, provide_data=test_data.provide_data, provide_label=test_data.provide_label, arg_params=arg_params, aux_params=aux_params) # print(test_data.provide_data_single[0][1]) # print(test_data.provide_label) # start detection pred_eval(predictor, test_data, imdb, config, vis=args.vis, ignore_cache=args.ignore_cache, thresh=args.thresh, logger=logger)
def main(): global classes assert os.path.exists(args.input), ('%s does not exist'.format(args.input)) im = cv2.imread(args.input, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) arr = np.array(im) origin_width, origin_height, _ = arr.shape portion = smart_chipping(origin_width, origin_height) # manually update the configuration # print(config.SCALES[0][0]) # TODO: note this is hard coded and assume there are three values for the SCALE configuration config.SCALES[0] = (portion, portion, portion) # config.max_per_image = # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_fpn_dcn_rcnn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # load demo data data = [] # portion = args.chip_size cwn, chn = (portion, portion) wn, hn = (int(origin_width / cwn), int(origin_height / chn)) padding_y = int( math.ceil(float(origin_height) / chn) * chn - origin_height) padding_x = int(math.ceil(float(origin_width) / cwn) * cwn - origin_width) print("padding_y,padding_x, origin_height, origin_width", padding_y, padding_x, origin_height, origin_width) # top, bottom, left, right - border width in number of pixels in corresponding directions im = cv2.copyMakeBorder(im, 0, padding_x, 0, padding_y, cv2.BORDER_CONSTANT, value=[0, 0, 0]) # the section below could be optimized. but basically the idea is to re-calculate all the values arr = np.array(im) width, height, _ = arr.shape cwn, chn = (portion, portion) wn, hn = (int(width / cwn), int(height / chn)) image_list = chip_image(im, (portion, portion)) for im in image_list: target_size = portion max_size = portion im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) # print("im.shape,im_scale",im.shape,im_scale) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_xview_480_640_800_alltrain'), 11, process=True) # arg_params, aux_params = load_param(cur_path + '/../model/' + ('fpn_dcn_coco' if not args.fpn_only else 'fpn_coco'), 0, process=True) print("loading parameter done") if args.cpu_only: predictor = Predictor(sym, data_names, label_names, context=[mx.cpu()], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(config.TEST.NMS) else: predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(args.gpu_index)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) num_preds = int(5000 * math.ceil(float(portion) / 400)) # test boxes, scores, classes = generate_detections(data, data_names, predictor, config, nms, image_list, num_preds) #Process boxes to be full-sized print("boxes shape is", boxes.shape, "wn, hn", wn, hn, "width, height", width, height) bfull = boxes.reshape((wn, hn, num_preds, 4)) for i in range(wn): for j in range(hn): bfull[i, j, :, 0] += j * cwn bfull[i, j, :, 2] += j * cwn bfull[i, j, :, 1] += i * chn bfull[i, j, :, 3] += i * chn # clip values bfull[i, j, :, 0] = np.clip(bfull[i, j, :, 0], 0, origin_height) bfull[i, j, :, 2] = np.clip(bfull[i, j, :, 2], 0, origin_height) bfull[i, j, :, 1] = np.clip(bfull[i, j, :, 1], 0, origin_width) bfull[i, j, :, 3] = np.clip(bfull[i, j, :, 3], 0, origin_width) bfull = bfull.reshape((hn * wn, num_preds, 4)) scores = scores.reshape((hn * wn, num_preds)) classes = classes.reshape((hn * wn, num_preds)) #only display boxes with confidence > .5 # print(bfull, scores, classes) #bs = bfull[scores > 0.08] #cs = classes[scores>0.08] #print("bfull.shape,scores.shape, bs.shape",bfull.shape,scores.shape, bs.shape) # s = im_name # draw_bboxes(arr,bs,cs).save("/tmp/"+s[0].split(".")[0] + ".png") #scoring_line_threshold = 11000 #if bs.shape[0] > scoring_line_threshold: # too many predictions, we should trim the low confidence ones with open(args.output, 'w') as f: for i in range(bfull.shape[0]): for j in range(bfull[i].shape[0]): #box should be xmin ymin xmax ymax box = bfull[i, j] class_prediction = classes[i, j] score_prediction = scores[i, j] if int(class_prediction) != 0: f.write('%d %d %d %d %d %f \n' % \ (box[0], box[1], box[2], box[3], int(class_prediction), score_prediction)) print('done')
def main(): # get symbol pprint.pprint(cfg) cfg.symbol = 'resnet_v1_101_flownet_rfcn' model = '/../model/rfcn_fgfa_flownet_vid' all_frame_interval = cfg.TEST.KEY_FRAME_INTERVAL * 2 + 1 max_per_image = cfg.TEST.max_per_image feat_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() aggr_sym_instance = eval(cfg.symbol + '.' + cfg.symbol)() feat_sym = feat_sym_instance.get_feat_symbol(cfg) aggr_sym = aggr_sym_instance.get_aggregation_symbol(cfg) # set up class names num_classes = 31 classes = ['__background__','airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 'watercraft', 'whale', 'zebra'] # load demo data image_names = glob.glob(cur_path + '/../demo/ILSVRC2015_val_00007010/*.JPEG') output_dir = cur_path + '/../demo/rfcn_fgfa/' if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im_name in image_names: assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor}) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (19, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((19, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype(np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype(np.int))))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch(data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data))] if(idx != len(data)-1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time()-t1 if (cfg.TEST.SEQ_NMS==False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time /(file_idx+1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) out_im = process_pred_result(classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format(str(file_idx)+'.JPEG', total_time / (file_idx+1)) file_idx += 1 end_counter+=1 if(cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [all_boxes[j][idx] for j in range(1, num_classes)] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def main(): # get symbol ctx_id = [int(i) for i in config.gpus.split(',')] pprint.pprint(config) sym_instance = eval(config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # load demo data image_names = [] names_dirs = os.listdir(cur_path + '/../' + test_dir) for im_name in names_dirs: if im_name[-4:] == '.jpg' or im_name[-4:] == '.png': image_names.append(im_name) data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../' + test_dir + im_name), ( '%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../' + test_dir + im_name, cv2.IMREAD_COLOR | long(128)) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] #print "before scale: " #print im.shape im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) #print "after scale: " #print im.shape #im_scale = 1.0 #print "scale ratio: " #print im_scale im_tensor = transform(im, config.network.PIXEL_MEANS) #print im_tensor.shape im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../' + model_dir, 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(ctx_id[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for i in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[0]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] _, _, _, _ = im_detect(predictor, data_batch, data_names, scales, config) # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, masks, data_dict = im_detect(predictor, data_batch, data_names, [1.0], config) im_shapes = [ data_batch.data[i][0].shape[2:4] for i in xrange(len(data_batch.data)) ] #print im_shapes if not config.TEST.USE_MASK_MERGE: all_boxes = [[] for _ in xrange(num_classes)] all_masks = [[] for _ in xrange(num_classes)] nms = py_nms_wrapper(config.TEST.NMS) for j in range(1, num_classes): indexes = np.where(scores[0][:, j] > 0.7)[0] cls_scores = scores[0][indexes, j, np.newaxis] cls_masks = masks[0][indexes, 1, :, :] try: if config.CLASS_AGNOSTIC: cls_boxes = boxes[0][indexes, :] else: raise Exception() except: cls_boxes = boxes[0][indexes, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) keep = nms(cls_dets) all_boxes[j] = cls_dets[keep, :] all_masks[j] = cls_masks[keep, :] dets = [all_boxes[j] for j in range(1, num_classes)] masks = [all_masks[j] for j in range(1, num_classes)] else: masks = masks[0][:, 1:, :, :] result_masks, result_dets = gpu_mask_voting( masks, boxes[0], scores[0], num_classes, 100, im_shapes[0][1], im_shapes[0][0], config.TEST.NMS, config.TEST.MASK_MERGE_THRESH, config.BINARY_THRESH, ctx_id[0]) dets = [result_dets[j] for j in range(1, num_classes)] masks = [ result_masks[j][:, 0, :, :] for j in range(1, num_classes) ] print 'testing {} {:.4f}s'.format(im_name, toc()) # visualize for i in xrange(len(dets)): keep = np.where(dets[i][:, -1] > 0.7) dets[i] = dets[i][keep] masks[i] = masks[i][keep] im = cv2.imread(cur_path + '/../' + test_dir + im_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) show_masks(im, dets, masks, classes, config, 1.0 / scales[0], False) # Save img cv2.imwrite(cur_path + '/../' + result_dir + im_name, cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) print 'done'
def batch_extract(self, multiple=True, gt_dir=None, epoch=0): """ :param multiple: :param gt_dir: :return: """ if len(self.img_list) % self.batch_size != 0: batch = len(self.img_list) / self.batch_size + 1 else: batch = len(self.img_list) / self.batch_size for i in xrange(batch): if i < batch - 1: self.batch_list = self.img_list[i * self.batch_size:(i + 1) * self.batch_size] else: self.batch_list = self.img_list[i * self.batch_size:] print '\nMini-batch %d\t' % (i + 1) tmp_data = [] target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] tic() for img in self.batch_list: assert os.path.exists(img), ('%s does not exist.'.format(img)) im = cv2.imread( img, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) # im_info: height, width, scale im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) tmp_data.append({ self.data_names[0]: im_tensor, self.data_names[1]: im_info }) self.ctx = [int(i) for i in config.gpus.split(',')] self.data = [[ mx.nd.array(tmp_data[i][name], mx.gpu(self.ctx[0])) for name in self.data_names ] for i in xrange(len(tmp_data))] max_data_shape = [[(self.data_names[0], (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(self.data_names, self.data[i])] for i in xrange(len(self.data))] provide_label = [None for i in xrange(len(self.data))] arg_params, aux_params = load_param(self.model_dir, epoch, process=True) self.predictor = Predictor(self.sym, self.data_names, self.label_name, context=[mx.gpu(self.ctx[0])], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) print 'preparation: %.4fs' % toc() if i == 0: self.warmup() self.forward(multiple=multiple, gt_dir=gt_dir) self.cleaner()
def predict(self, images, feat_output, aggr_feat_output): model = self.model all_frame_interval = self.all_frame_interval feat_sym = self.feat_sym aggr_sym = self.aggr_sym num_classes = self.num_classes classes = self.classes max_per_image = self.max_per_image output_dir = cur_path + '/../demo/rfcn_fgfa_{}/'.format(self.index) self.index += 1 if not os.path.exists(output_dir): os.makedirs(output_dir) data = [] for im in images: target_size = cfg.SCALES[0][0] max_size = cfg.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=cfg.network.IMAGE_STRIDE) im_tensor = transform(im, cfg.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) feat_stride = float(cfg.network.RCNN_FEAT_STRIDE) data.append({ 'data': im_tensor, 'im_info': im_info, 'data_cache': im_tensor, 'feat_cache': im_tensor }) # get predictor print 'get-predictor' data_names = ['data', 'im_info', 'data_cache', 'feat_cache'] label_names = [] t1 = time.time() data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('data_cache', (11, 3, max([v[0] for v in cfg.SCALES]), max([v[1] for v in cfg.SCALES]))), ('feat_cache', ((11, cfg.network.FGFA_FEAT_DIM, np.ceil(max([v[0] for v in cfg.SCALES]) / feat_stride).astype( np.int), np.ceil(max([v[1] for v in cfg.SCALES]) / feat_stride).astype( np.int)))) ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for _ in xrange(len(data))] arg_params, aux_params = load_param(cur_path + model, 0, process=True) feat_predictors = Predictor(feat_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) aggr_predictors = Predictor(aggr_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = py_nms_wrapper(cfg.TEST.NMS) # First frame of the video idx = 0 data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] all_boxes = [[[] for _ in range(len(data))] for _ in range(num_classes)] data_list = deque(maxlen=all_frame_interval) feat_list = deque(maxlen=all_frame_interval) image, feat = get_resnet_output(feat_predictors, data_batch, data_names) # append cfg.TEST.KEY_FRAME_INTERVAL padding images in the front (first frame) while len(data_list) < cfg.TEST.KEY_FRAME_INTERVAL: data_list.append(image) feat_list.append(feat) vis = False file_idx = 0 thresh = 1e-3 for idx, element in enumerate(data): data_batch = mx.io.DataBatch( data=[element], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, element)]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if (idx != len(data) - 1): if len(data_list) < all_frame_interval - 1: image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) else: ################################################# # main part of the loop ################################################# image, feat = get_resnet_output(feat_predictors, data_batch, data_names) data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 data_batch.data[0][-2] = None data_batch.provide_data[0][-2] = ('data_cache', None) data_batch.data[0][-1] = None data_batch.provide_data[0][-1] = ('feat_cache', None) out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 else: ################################################# # end part of a video # ################################################# end_counter = 0 image, feat = get_resnet_output(feat_predictors, data_batch, data_names) while end_counter < cfg.TEST.KEY_FRAME_INTERVAL + 1: data_list.append(image) feat_list.append(feat) prepare_data(data_list, feat_list, data_batch) pred_result, aggr_feat = im_detect(aggr_predictors, data_batch, data_names, scales, cfg) assert len(aggr_feat) == 1 out_im = process_pred_result( classes, pred_result, num_classes, thresh, cfg, nms, all_boxes, file_idx, max_per_image, vis, data_list[cfg.TEST.KEY_FRAME_INTERVAL].asnumpy(), scales) total_time = time.time() - t1 if (cfg.TEST.SEQ_NMS == False): save_image(output_dir, file_idx, out_im) print 'testing {} {:.4f}s'.format( str(file_idx) + '.JPEG', total_time / (file_idx + 1)) file_idx += 1 end_counter += 1 if (cfg.TEST.SEQ_NMS): video = [all_boxes[j][:] for j in range(1, num_classes)] dets_all = seq_nms(video) for cls_ind, dets_cls in enumerate(dets_all): for frame_ind, dets in enumerate(dets_cls): keep = nms(dets) all_boxes[cls_ind + 1][frame_ind] = dets[keep, :] for idx in range(len(data)): boxes_this_image = [[]] + [ all_boxes[j][idx] for j in range(1, num_classes) ] out_im = draw_all_detection(data[idx][0].asnumpy(), boxes_this_image, classes, scales[0], cfg) save_image(output_dir, idx, out_im) print 'done'
def run_detection(im_root, result_root, conf_threshold): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_rfcn_dcn' if not args.rfcn_only else 'resnet_v1_101_rfcn' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 81 classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] print('detection in {}'.format(im_root)) im_names = sorted(os.listdir(im_root)) # get predictor data_names = ['data', 'im_info'] label_names = [] data = [] for idx, im_name in enumerate(im_names[:2]): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param( cur_path + '/../model/' + ('rfcn_dcn_coco' if not args.rfcn_only else 'rfcn_coco'), config.TEST.test_epoch, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # nms = gpu_nms_wrapper(config.TEST.NMS, 0) # nms = soft_nms_wrapper(config.TEST.NMS, method=2) nms = gpu_soft_nms_wrapper(config.TEST.NMS, method=2, device_id=0) nms_t = Timer() for idx, im_name in enumerate(im_names): im_file = os.path.join(im_root, im_name) im = cv2.imread(im_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) origin_im = im.copy() target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) # input data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] data_batch = mx.io.DataBatch(data=[data], label=[], pad=0, index=idx, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() scores, boxes, data_dict = im_detect(predictor, data_batch, data_names, scales, config) boxes = boxes[0].astype('f') scores = scores[0].astype('f') dets_nms = [] for j in range(1, scores.shape[1]): cls_scores = scores[:, j, np.newaxis] cls_boxes = boxes[:, 4:8] if config.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) nms_t.tic() keep = nms(cls_dets) nms_t.toc() cls_dets = cls_dets[keep, :] cls_dets = cls_dets[cls_dets[:, -1] > 0.7, :] dets_nms.append(cls_dets) print 'testing {} {:.2f}ms'.format(im_name, toc() * 1000) print 'nms: {:.2f}ms'.format(nms_t.total_time * 1000) nms_t.clear() # save results person_dets = dets_nms[0] with open(os.path.join(result_root, '{:04d}.txt'.format(idx)), 'w') as f: f.write('{}\n'.format(len(person_dets))) for det in person_dets: x1, y1, x2, y2, s = det w = x2 - x1 h = y2 - y1 f.write('0 {} {} {} {} {}\n'.format(s, w, h, x1, y1)) # visualize im = origin_im # im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = show_boxes_cv2(im, dets_nms, classes, 1) cv2.imshow('det', im) cv2.waitKey(1)
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_flownet_deeplab' model1 = '/../model/rfcn_dff_flownet_vid' model2 = '/../model/deeplab_dcn_cityscapes' sym_instance = eval(config.symbol + '.' + config.symbol)() key_sym = sym_instance.get_key_test_symbol(config) cur_sym = sym_instance.get_cur_test_symbol(config) # settings num_classes = 19 interv = args.interval num_ex = args.num_ex # load demo data image_names = sorted( glob.glob(cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_all_i' + str(interv) + '/*.png')) image_names = image_names[:interv * num_ex] label_files = sorted( glob.glob( cur_path + '/../demo/cityscapes_data/cityscapes_frankfurt_labels_all/*.png')) output_dir = cur_path + '/../demo/deeplab_dff/' if not os.path.exists(output_dir): os.makedirs(output_dir) key_frame_interval = interv # data = [] key_im_tensor = None for idx, im_name in enumerate(image_names): assert os.path.exists(im_name), ('%s does not exist'.format(im_name)) im = cv2.imread(im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array( [[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) if idx % key_frame_interval == 0: key_im_tensor = im_tensor data.append({ 'data': im_tensor, 'im_info': im_info, 'data_key': key_im_tensor, 'feat_key': np.zeros((1, config.network.DFF_FEAT_DIM, 1, 1)) }) # get predictor data_names = ['data', 'data_key', 'feat_key'] label_names = [] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[ ('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ('data_key', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES]))), ]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] # models: rfcn_dff_flownet_vid, deeplab_cityscapes arg_params, aux_params = load_param_multi(cur_path + model1, cur_path + model2, 0, process=True) key_predictor = Predictor(key_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) cur_predictor = Predictor(cur_sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) nms = gpu_nms_wrapper(config.TEST.NMS, 0) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[j]], label=[], pad=0, index=0, provide_data=[[ (k, v.shape) for k, v in zip(data_names, data[j]) ]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] if j % key_frame_interval == 0: # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] print "warmup done" # test time = 0 count = 0 hist = np.zeros((num_classes, num_classes)) lb_idx = 0 for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch( data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) scales = [ data_batch.data[i][1].asnumpy()[0, 2] for i in xrange(len(data_batch.data)) ] tic() if idx % key_frame_interval == 0: print '\nframe {} (key)'.format(idx) # scores, boxes, data_dict, feat = im_detect(key_predictor, data_batch, data_names, scales, config) output_all, feat = im_segment(key_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] else: print '\nframe {} (intermediate)'.format(idx) data_batch.data[0][-1] = feat data_batch.provide_data[0][-1] = ('feat_key', feat.shape) # scores, boxes, data_dict, _ = im_detect(cur_predictor, data_batch, data_names, scales, config) output_all, _ = im_segment(cur_predictor, data_batch) output_all = [ mx.ndarray.argmax(output['croped_score_output'], axis=1).asnumpy() for output in output_all ] elapsed = toc() time += elapsed count += 1 print 'testing {} {:.4f}s [{:.4f}s]'.format(im_name, elapsed, time / count) pred = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(pred) pallete = getpallete(256) segmentation_result.putpalette(pallete) _, im_filename = os.path.split(im_name) segmentation_result.save(output_dir + '/seg_' + im_filename) label = None _, lb_filename = os.path.split(label_files[lb_idx]) im_comps = im_filename.split('_') lb_comps = lb_filename.split('_') # if annotation available for frame if im_comps[1] == lb_comps[1] and im_comps[2] == lb_comps[2]: print 'label {}'.format(lb_filename) label = np.asarray(Image.open(label_files[lb_idx])) if lb_idx < len(label_files) - 1: lb_idx += 1 if label is not None: curr_hist = fast_hist(pred.flatten(), label.flatten(), num_classes) hist += curr_hist print 'mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(curr_hist)) * 100, 2)) print '(cum) mIoU {mIoU:.3f}'.format( mIoU=round(np.nanmean(per_class_iu(hist)) * 100, 2)) ious = per_class_iu(hist) * 100 print ' '.join('{:.03f}'.format(i) for i in ious) print '===> final mIoU {mIoU:.3f}'.format(mIoU=round(np.nanmean(ious), 2)) print 'done'
def main(): # get symbol pprint.pprint(config) config.symbol = 'resnet_v1_101_deeplab_dcn' if not args.deeplab_only else 'resnet_v1_101_deeplab' sym_instance = eval(config.symbol + '.' + config.symbol)() sym = sym_instance.get_symbol(config, is_train=False) # set up class names num_classes = 19 # load demo data image_names = ['frankfurt_000001_073088_leftImg8bit.png', 'lindau_000024_000019_leftImg8bit.png'] data = [] for im_name in image_names: assert os.path.exists(cur_path + '/../demo/' + im_name), ('%s does not exist'.format('../demo/' + im_name)) im = cv2.imread(cur_path + '/../demo/' + im_name, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) target_size = config.SCALES[0][0] max_size = config.SCALES[0][1] im, im_scale = resize(im, target_size, max_size, stride=config.network.IMAGE_STRIDE) im_tensor = transform(im, config.network.PIXEL_MEANS) im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32) data.append({'data': im_tensor, 'im_info': im_info}) # get predictor data_names = ['data'] label_names = ['softmax_label'] data = [[mx.nd.array(data[i][name]) for name in data_names] for i in xrange(len(data))] max_data_shape = [[('data', (1, 3, max([v[0] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]] provide_data = [[(k, v.shape) for k, v in zip(data_names, data[i])] for i in xrange(len(data))] provide_label = [None for i in xrange(len(data))] arg_params, aux_params = load_param(cur_path + '/../model/' + ('deeplab_dcn_cityscapes' if not args.deeplab_only else 'deeplab_cityscapes'), 0, process=True) predictor = Predictor(sym, data_names, label_names, context=[mx.gpu(0)], max_data_shapes=max_data_shape, provide_data=provide_data, provide_label=provide_label, arg_params=arg_params, aux_params=aux_params) # warm up for j in xrange(2): data_batch = mx.io.DataBatch(data=[data[0]], label=[], pad=0, index=0, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[0])]], provide_label=[None]) output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] # test for idx, im_name in enumerate(image_names): data_batch = mx.io.DataBatch(data=[data[idx]], label=[], pad=0, index=idx, provide_data=[[(k, v.shape) for k, v in zip(data_names, data[idx])]], provide_label=[None]) tic() output_all = predictor.predict(data_batch) output_all = [mx.ndarray.argmax(output['softmax_output'], axis=1).asnumpy() for output in output_all] pallete = getpallete(256) segmentation_result = np.uint8(np.squeeze(output_all)) segmentation_result = Image.fromarray(segmentation_result) segmentation_result.putpalette(pallete) print 'testing {} {:.4f}s'.format(im_name, toc()) pure_im_name, ext_im_name = os.path.splitext(im_name) segmentation_result.save(cur_path + '/../demo/seg_' + pure_im_name + '.png') # visualize im_raw = cv2.imread(cur_path + '/../demo/' + im_name) seg_res = cv2.imread(cur_path + '/../demo/seg_' + pure_im_name + '.png') cv2.imshow('Raw Image', im_raw) cv2.imshow('segmentation_result', seg_res) cv2.waitKey(0) print 'done'