def run_model_pb(args, net, init_net, im, check_blobs): workspace.ResetWorkspace() workspace.RunNetOnce(init_net) mutils.create_input_blobs_for_net(net.Proto()) workspace.CreateNet(net) # input_blobs, _ = core_test._get_blobs(im, None) input_blobs = _prepare_blobs( im, cfg.PIXEL_MEANS, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE ) gpu_blobs = [] if args.device == 'gpu': gpu_blobs = ['data'] for k, v in input_blobs.items(): workspace.FeedBlob( core.ScopedName(k), v, mutils.get_device_option_cuda() if k in gpu_blobs else mutils.get_device_option_cpu() ) try: workspace.RunNet(net.Proto().name) scores = workspace.FetchBlob('score_nms') classids = workspace.FetchBlob('class_nms') boxes = workspace.FetchBlob('bbox_nms') except Exception as e: print('Running pb model failed.\n{}'.format(e)) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) boxes = np.column_stack((boxes, scores)) # sort the results based on score for comparision boxes, _, _, classids = _sort_results( boxes, None, None, classids) # write final result back to workspace workspace.FeedBlob('result_boxes', boxes) workspace.FeedBlob('result_classids', classids) ret = _get_result_blobs(check_blobs) return ret
def convert_model_gpu(args, net, init_net): assert args.device == 'gpu' ret_net = copy.deepcopy(net) ret_init_net = copy.deepcopy(init_net) cdo_cuda = mutils.get_device_option_cuda() cdo_cpu = mutils.get_device_option_cpu() CPU_OPS = [ ["CollectAndDistributeFpnRpnProposals", None], ["GenerateProposals", None], ["BBoxTransform", None], ["BoxWithNMSLimit", None], ] CPU_BLOBS = ["im_info", "anchor"] @op_filter() def convert_op_gpu(op): for x in CPU_OPS: if mutils.filter_op(op, type=x[0], inputs=x[1]): return None op.device_option.CopyFrom(cdo_cuda) return [op] @op_filter() def convert_init_op_gpu(op): if op.output[0] in CPU_BLOBS: op.device_option.CopyFrom(cdo_cpu) else: op.device_option.CopyFrom(cdo_cuda) return [op] convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu) convert_op_in_proto(ret_net.Proto(), convert_op_gpu) ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net]) return [ret[0][1], ret[0][0]]
def convert_model_gpu(args, net, init_net): assert args.device == 'gpu' ret_net = copy.deepcopy(net) ret_init_net = copy.deepcopy(init_net) cdo_cuda = mutils.get_device_option_cuda() cdo_cpu = mutils.get_device_option_cpu() CPU_OPS = [ ["GenerateProposals", None], ["BBoxTransform", None], ["BoxWithNMSLimit", None], ] CPU_BLOBS = ["im_info", "anchor"] @op_filter() def convert_op_gpu(op): for x in CPU_OPS: if mutils.filter_op(op, type=x[0], inputs=x[1]): return None op.device_option.CopyFrom(cdo_cuda) return [op] @op_filter() def convert_init_op_gpu(op): if op.output[0] in CPU_BLOBS: op.device_option.CopyFrom(cdo_cpu) else: op.device_option.CopyFrom(cdo_cuda) return [op] convert_op_in_proto(ret_init_net.Proto(), convert_init_op_gpu) convert_op_in_proto(ret_net.Proto(), convert_op_gpu) ret = core.InjectDeviceCopiesAmongNets([ret_init_net, ret_net]) return [ret[0][1], ret[0][0]]
def run_model_pb(args, net, init_net, im, check_blobs): workspace.ResetWorkspace() with c2_utils.NamedCudaScope(0): workspace.RunNetOnce(init_net) for k in workspace.Blobs(): if k not in ['data', 'im_info','anchor2','anchor3','anchor4','anchor5','anchor6','bbox_pred','rpn_rois','bbox_pred_w','bbox_pred_b']: a=workspace.FetchBlob(k) workspace.FeedBlob(k,a.astype(np.float16)) # else: # a = workspace.FetchBlob(k) # workspace.FeedBlob(core.ScopedName(k), a) mutils.create_input_blobs_for_net(net.Proto()) workspace.CreateNet(net) tt1=time.time() for i in range(10): # input_blobs, _ = core_test._get_blobs(im, None) input_blobs = _prepare_blobs( im, cfg.PIXEL_MEANS, cfg.TEST.SCALES, cfg.TEST.MAX_SIZE ) gpu_blobs = [] if args.device == 'gpu': gpu_blobs = ['data'] for k, v in input_blobs.items(): workspace.FeedBlob( k,#core.ScopedName(k), v, mutils.get_device_option_cuda() if k in gpu_blobs else mutils.get_device_option_cpu() ) try: workspace.RunNet(net) scores = workspace.FetchBlob('score_nms') classids = workspace.FetchBlob('class_nms') boxes = workspace.FetchBlob('bbox_nms') except Exception as e: print('Running pb model failed.\n{}'.format(e)) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) tt2=time.time()-tt1 print("model2 1000 times total time {}s".format(tt2)) boxes = np.column_stack((boxes, scores)) # sort the results based on score for comparision boxes, _, _, classids = _sort_results( boxes, None, None, classids) # write final result back to workspace workspace.FeedBlob(core.ScopedName('result_boxes'), boxes) workspace.FeedBlob(core.ScopedName('result_classids'), classids) ret = _get_result_blobs(check_blobs) return ret