def ConvertTensorProtosToInitNet(net_params, input_name): """Takes the net_params returned from TranslateModel, and wrap it as an init net that contain GivenTensorFill. This is a very simple feature that only works with float tensors, and is only intended to be used in an environment where you want a single initialization file - for more complex cases, use a db to store the parameters. """ init_net = caffe2_pb2.NetDef() for tensor in net_params.protos: if len(tensor.float_data) == 0: raise RuntimeError( "Only float tensors are supported in this util.") op = core.CreateOperator( "GivenTensorFill", [], [tensor.name], arg=[ utils.MakeArgument("shape", list(tensor.dims)), utils.MakeArgument("values", tensor.float_data) ]) init_net.op.extend([op]) init_net.op.extend( [core.CreateOperator("ConstantFill", [], [input_name], shape=[1])]) return init_net
def check_set_pb_arg(pb, arg_name, arg_attr, arg_value, allow_override=False): arg = get_pb_arg(pb, arg_name) if arg is None: arg = putils.MakeArgument(arg_name, arg_value) assert hasattr(arg, arg_attr) pb.arg.extend([arg]) if allow_override and getattr(arg, arg_attr) != arg_value: logger.warning( "Override argument {}: {} -> {}".format(arg_name, getattr(arg, arg_attr), arg_value) ) setattr(arg, arg_attr, arg_value) else: assert arg is not None assert getattr(arg, arg_attr) == arg_value, "Existing value {}, new value {}".format( getattr(arg, arg_attr), arg_value )
def run_on_device(self, device_opts): np.random.seed(0) proposal_count = 5000 input_names = [] inputs = [] for lvl in range(RPN_MIN_LEVEL, RPN_MAX_LEVEL + 1): rpn_roi = (ROI_CANONICAL_SCALE * np.random.rand(proposal_count, 5).astype(np.float32)) for i in range(proposal_count): # Make RoIs have positive area, since they # are in the format [[batch_idx, x0, y0, x1, y2], ...] rpn_roi[i][3] += rpn_roi[i][1] rpn_roi[i][4] += rpn_roi[i][2] input_names.append('rpn_rois_fpn{}'.format(lvl)) inputs.append(rpn_roi) for lvl in range(RPN_MIN_LEVEL, RPN_MAX_LEVEL + 1): rpn_roi_score = np.random.rand(proposal_count).astype(np.float32) input_names.append('rpn_roi_probs_fpn{}'.format(lvl)) inputs.append(rpn_roi_score) output_names = [ 'rois', ] for lvl in range(ROI_MIN_LEVEL, ROI_MAX_LEVEL + 1): output_names.append('rois_fpn{}'.format(lvl)) output_names.append('rois_idx_restore') op = core.CreateOperator( 'CollectAndDistributeFpnRpnProposals', input_names, output_names, arg=[ utils.MakeArgument("roi_canonical_scale", ROI_CANONICAL_SCALE), utils.MakeArgument("roi_canonical_level", ROI_CANONICAL_LEVEL), utils.MakeArgument("roi_max_level", ROI_MAX_LEVEL), utils.MakeArgument("roi_min_level", ROI_MIN_LEVEL), utils.MakeArgument("rpn_max_level", RPN_MAX_LEVEL), utils.MakeArgument("rpn_min_level", RPN_MIN_LEVEL), utils.MakeArgument("post_nms_topN", RPN_POST_NMS_TOP_N), ], device_option=device_opts) self.assertReferenceChecks( device_option=device_opts, op=op, inputs=inputs, reference=collect_and_distribute_fpn_rpn_ref, )
def test_collect_and_dist( self, proposal_count, rpn_min_level, rpn_num_levels, roi_min_level, roi_num_levels, rpn_post_nms_topN, roi_canonical_scale, roi_canonical_level, gc, dc ): input_names, inputs = self._create_input( proposal_count, rpn_min_level, rpn_num_levels, roi_canonical_scale ) output_names = [ 'rois', ] for lvl in range(roi_num_levels): output_names.append('rois_fpn{}'.format(lvl + roi_min_level)) output_names.append('rois_idx_restore') op = core.CreateOperator( 'CollectAndDistributeFpnRpnProposals', input_names, output_names, arg=[ utils.MakeArgument("roi_canonical_scale", roi_canonical_scale), utils.MakeArgument("roi_canonical_level", roi_canonical_level), utils.MakeArgument("roi_max_level", roi_min_level + roi_num_levels - 1), utils.MakeArgument("roi_min_level", roi_min_level), utils.MakeArgument("rpn_max_level", rpn_min_level + rpn_num_levels - 1), utils.MakeArgument("rpn_min_level", rpn_min_level), utils.MakeArgument("rpn_post_nms_topN", rpn_post_nms_topN), ], device_option=gc) args = { 'rpn_min_level' : rpn_min_level, 'rpn_num_levels' : rpn_num_levels, 'roi_min_level' : roi_min_level, 'roi_num_levels' : roi_num_levels, 'rpn_post_nms_topN' : rpn_post_nms_topN, 'roi_canonical_scale' : roi_canonical_scale, 'roi_canonical_level' : roi_canonical_level} self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs + [args], reference=collect_and_distribute_fpn_rpn_ref, )
def test_alias_with_name_op(self, shape, dtype, dc, gc): test_input = (100 * np.random.random(shape)).astype(dtype) test_inputs = [test_input] alias_op = core.CreateOperator( "AliasWithName", ["input"], ["output"], device_option=gc, ) alias_op.arg.add().CopyFrom(utils.MakeArgument("name", "whatever_name")) def reference_func(x): return (x, ) self.assertReferenceChecks(gc, alias_op, test_inputs, reference_func)
def setUp(self): super(TestHeatmapMaxKeypointOp, self).setUp() np.random.seed(0) # initial coordinates and interpolate HEATMAP_SIZE from it HEATMAP_SMALL_SIZE = 4 bboxes_in = 500 * np.random.rand(NUM_TEST_ROI, 4).astype(np.float32) # only bbox with smaller first coordiantes for i in range(NUM_TEST_ROI): if bboxes_in[i][0] > bboxes_in[i][2]: tmp = bboxes_in[i][2] bboxes_in[i][2] = bboxes_in[i][0] bboxes_in[i][0] = tmp if bboxes_in[i][1] > bboxes_in[i][3]: tmp = bboxes_in[i][3] bboxes_in[i][3] = bboxes_in[i][1] bboxes_in[i][1] = tmp # initial randomized coordiantes for heatmaps and expand it with interpolation init = np.random.rand(NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SMALL_SIZE, HEATMAP_SMALL_SIZE).astype(np.float32) heatmaps_in = np.zeros((NUM_TEST_ROI, NUM_KEYPOINTS, HEATMAP_SIZE, HEATMAP_SIZE)).astype(np.float32) for roi in range(NUM_TEST_ROI): for keyp in range(NUM_KEYPOINTS): f = interpolate.interp2d(np.arange(0, 1, 1.0 / HEATMAP_SMALL_SIZE), np.arange(0, 1, 1.0 / HEATMAP_SMALL_SIZE), init[roi][keyp], kind='cubic') heatmaps_in[roi][keyp] = f(np.arange(0, 1, 1.0 / HEATMAP_SIZE), np.arange(0, 1, 1.0 / HEATMAP_SIZE)) self.heatmaps_in = heatmaps_in self.bboxes_in = bboxes_in self.op = core.CreateOperator('HeatmapMaxKeypoint', ['heatmaps_in', 'bboxes_in'], ['keypoints_out'], arg=[ utils.MakeArgument( "should_output_softmax", True), ], device_option=caffe2_pb2.DeviceOption())
def CreateOperator( operator_type, inputs, outputs, name='', device_option=None, arg=None, engine=None, **kwargs ): """A function wrapper that allows one to create operators based on the operator type. The type should be a string corresponding to an operator registered with Caffe2. """ operator = caffe2_pb2.OperatorDef() operator.type = operator_type operator.name = name # Add rectified inputs and outputs inputs = _RectifyInputOutput(inputs) outputs = _RectifyInputOutput(outputs) operator.input.extend([str(i) for i in inputs]) operator.output.extend([str(o) for o in outputs]) # Set device option: # (1) If device_option is explicitly set, use device_option. # (2) If not, but _DEVICESCOPE is set, then we use the _DEVICESCOPE. # (3) Otherwise, do not set device option. if device_option is not None: operator.device_option.CopyFrom(device_option) elif _DEVICESCOPE is not None: operator.device_option.CopyFrom(_DEVICESCOPE) if engine is not None: operator.engine = engine # random seed is defined in the device option, so we need to do special # care. if 'random_seed' in kwargs: operator.device_option.random_seed = kwargs['random_seed'] del kwargs['random_seed'] # Add given arguments that do not need parsing if arg is not None: operator.arg.extend(arg) # Add all other arguments for key, value in kwargs.items(): operator.arg.add().CopyFrom(utils.MakeArgument(key, value)) return operator
def get_max(self, op, tensor, tensor_idx, tensor_name, max_name): global iteration_idx name = max_name + "_" + str(tensor_idx) op_hist_name = tensor_name + "_" + max_name + "_" + str(tensor_idx) arg = self.get_arg(op, name) if iteration_idx < self.kl_iter_num_for_range: max_min = np.array([np.max(tensor), np.min(tensor)]).astype(np.float32) if arg is not None: orig_max = arg.floats[0] orig_min = arg.floats[1] cur_max = max(orig_max, max_min[0]) cur_min = min(orig_min, max_min[1]) max_min = np.array([cur_max, cur_min]).astype(np.float32) self.remove_arg(op, name) # save max vaules in predict_def as operator arguments max_arg = utils.MakeArgument(name, max_min) op.arg.extend([max_arg]) else: assert arg is not None max_val = arg.floats[0] min_val = arg.floats[1] self.get_kl_hist(tensor, min_val, max_val, op_hist_name)
def update_max(self, op, max_name, tensor_idx, tensor_name): """update the max data of the collected data""" global hist global hist_edges global iteration_idx name = max_name + "_" + str(tensor_idx) hist_name = tensor_name + "_" + max_name + "_" + str(tensor_idx) P_sum = iteration_idx - self.kl_iter_num_for_range arg = self.get_arg(op, name) assert arg is not None max_val = arg.floats[0] min_val = arg.floats[1] hist_iter = hist[hist_name] hist_edges_iter = hist_edges[hist_name] layer_max = self.get_optimal_scaling_factor(hist_iter, hist_edges_iter, P_sum, max_val, min_val) self.remove_arg(op, name) max_arg = utils.MakeArgument(name, np.array([layer_max]).astype(np.float32)) # save max vaules in predict_def as operator arguments op.arg.extend([max_arg])
def testArgsToDict(self): args = [ utils.MakeArgument("int1", 3), utils.MakeArgument("float1", 4.0), utils.MakeArgument("string1", "foo"), utils.MakeArgument("intlist1", np.array([3, 4])), utils.MakeArgument("floatlist1", np.array([5.0, 6.0])), utils.MakeArgument("stringlist1", np.array(["foo", "bar"])) ] dict_ = utils.ArgsToDict(args) expected = { "int1": 3, "float1": 4.0, "string1": b"foo", "intlist1": [3, 4], "floatlist1": [5.0, 6.0], "stringlist1": [b"foo", b"bar"] } self.assertEqual( dict_, expected, "dictionary version of arguments " "doesn't match original")
def add_bbox_ops(args, net, blobs): new_ops = [] new_external_outputs = [] # Operators for bboxes op_box = core.CreateOperator( "BBoxTransform", ["rpn_rois", "bbox_pred", "im_info"], ["pred_bbox"], weights=cfg.MODEL.BBOX_REG_WEIGHTS, apply_scale=False, correct_transform_coords=True, ) new_ops.extend([op_box]) blob_prob = "cls_prob" blob_box = "pred_bbox" op_nms = core.CreateOperator( "BoxWithNMSLimit", [blob_prob, blob_box], ["score_nms", "bbox_nms", "class_nms"], arg=[ putils.MakeArgument("score_thresh", cfg.TEST.SCORE_THRESH), putils.MakeArgument("nms", cfg.TEST.NMS), putils.MakeArgument("detections_per_im", cfg.TEST.DETECTIONS_PER_IM), putils.MakeArgument("soft_nms_enabled", cfg.TEST.SOFT_NMS.ENABLED), putils.MakeArgument("soft_nms_method", cfg.TEST.SOFT_NMS.METHOD), putils.MakeArgument("soft_nms_sigma", cfg.TEST.SOFT_NMS.SIGMA), ], ) new_ops.extend([op_nms]) new_external_outputs.extend(["score_nms", "bbox_nms", "class_nms"]) net.Proto().op.extend(new_ops) net.Proto().external_output.extend(new_external_outputs)
# ./tests/unittests/caffe2ImporterTest.cpp # Run $>python gen_caffe2_model.py to get the model files. from caffe2.proto import caffe2_pb2 from caffe2.python import utils from google.protobuf import text_format # Define a weights network weights = caffe2_pb2.NetDef() weights.name = "init" op = caffe2_pb2.OperatorDef() op.type = "GivenTensorFill" op.output.extend(["conv_w"]) op.arg.extend([utils.MakeArgument("shape", [1, 1, 2, 2])]) op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])]) weights.op.extend([op]) op = caffe2_pb2.OperatorDef() op.type = "GivenTensorFill" op.output.extend(["conv_b"]) op.arg.extend([utils.MakeArgument("shape", [1])]) op.arg.extend([utils.MakeArgument("values", [2.0 for i in range(1)])]) weights.op.extend([op]) weights.external_output.extend(op.output) # Define an inference net net = caffe2_pb2.NetDef() net.name = "predict"
from caffe2.python import utils # Define a weights network weights = caffe2_pb2.NetDef() weights.name = "init" op = caffe2_pb2.OperatorDef() op.type = "fake_data_provider" op.output.extend(["data"]) weights.op.extend([op]) weights.external_output.extend(op.output) op = caffe2_pb2.OperatorDef() op.type = "GivenTensorFill" op.output.extend(["fc_w"]) op.arg.extend([utils.MakeArgument("shape", [1,4])]) op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])]) weights.op.extend([op]) weights.external_output.extend(op.output) op = caffe2_pb2.OperatorDef() op.type = "GivenTensorFill" op.output.extend(["fc_b"]) op.arg.extend([utils.MakeArgument("shape", [1,4])]) op.arg.extend([utils.MakeArgument("values", [1.0 for i in range(4)])]) weights.op.extend([op]) weights.external_output.extend(op.output) # Define an inference net net = caffe2_pb2.NetDef() net.name = "predict"
def run_single_kpts( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((3,), np.float32), "keypoint_rois": np.zeros((3, 4), bbox_type), "keypoints_out": np.zeros((3, 17, 2), bbox_type), "class_nms": np.zeros((3,), np.int32), "keypoints_scores_out": np.zeros((3, 17), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) scores = workspace.FetchBlob("score_nms") boxes = workspace.FetchBlob("keypoint_rois") coords_preds = workspace.FetchBlob("keypoints_out") scores_preds = workspace.FetchBlob("keypoints_scores_out") classids = workspace.FetchBlob("class_nms") if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 # New output format of AABBRoIKeypoints: # - XY coordinates are [num_rois, num_keypoints, 2] array in keypoints_out # - Scores are [num_rois, num_keypoints] array in keypoints_scores_out if coords_preds.dtype == np.uint16: coords_preds = coords_preds.astype(np.float32) * 0.125 assert coords_preds.shape[:2] == scores_preds.shape num_rois, num_keypoints = coords_preds.shape[:2] xy_preds = np.concatenate( (coords_preds, scores_preds.reshape([num_rois, num_keypoints, 1]), np.zeros([num_rois, num_keypoints, 1], dtype=np.float32)), axis=2) assert xy_preds.shape == (num_rois, num_keypoints, 4) xy_preds = np.swapaxes(xy_preds, 1, 2) assert xy_preds.shape == (num_rois, 4, num_keypoints) except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) xy_preds = np.zeros((R, 4, 1), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) scale = inputs["im_info"][0][2] boxes /= scale if xy_preds is not None: xy_preds /= scale boxes = np.column_stack((boxes, scores)) return boxes, xy_preds, classids
def test_collect_and_dist(self, proposal_count, rpn_min_level, rpn_num_levels, roi_min_level, roi_num_levels, rpn_post_nms_topN, roi_canonical_scale, roi_canonical_level, gc, dc): np.random.seed(0) input_names = [] inputs = [] for lvl in range(rpn_num_levels): rpn_roi = (roi_canonical_scale * np.random.rand(proposal_count, 5).astype(np.float32)) for i in range(proposal_count): # Make RoIs have positive area, since they # are in the format [[batch_idx, x0, y0, x1, y2], ...] rpn_roi[i][3] += rpn_roi[i][1] rpn_roi[i][4] += rpn_roi[i][2] input_names.append('rpn_rois_fpn{}'.format(lvl + rpn_min_level)) inputs.append(rpn_roi) for lvl in range(rpn_num_levels): rpn_roi_score = np.random.rand(proposal_count).astype(np.float32) input_names.append('rpn_roi_probs_fpn{}'.format(lvl + rpn_min_level)) inputs.append(rpn_roi_score) output_names = [ 'rois', ] for lvl in range(roi_num_levels): output_names.append('rois_fpn{}'.format(lvl + roi_min_level)) output_names.append('rois_idx_restore') op = core.CreateOperator( 'CollectAndDistributeFpnRpnProposals', input_names, output_names, arg=[ utils.MakeArgument("roi_canonical_scale", roi_canonical_scale), utils.MakeArgument("roi_canonical_level", roi_canonical_level), utils.MakeArgument("roi_max_level", roi_min_level + roi_num_levels - 1), utils.MakeArgument("roi_min_level", roi_min_level), utils.MakeArgument("rpn_max_level", rpn_min_level + rpn_num_levels - 1), utils.MakeArgument("rpn_min_level", rpn_min_level), utils.MakeArgument("rpn_post_nms_topN", rpn_post_nms_topN), ], device_option=gc) args = { 'proposal_count': proposal_count, 'rpn_min_level': rpn_min_level, 'rpn_num_levels': rpn_num_levels, 'roi_min_level': roi_min_level, 'roi_num_levels': roi_num_levels, 'rpn_post_nms_topN': rpn_post_nms_topN, 'roi_canonical_scale': roi_canonical_scale, 'roi_canonical_level': roi_canonical_level } self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs + [args], reference=collect_and_distribute_fpn_rpn_ref, )
def Benchmark(model_gen, arg): model, input_size = model_gen(arg.order) # In order to be able to run everything without feeding more stuff, let's # add the data and label blobs to the parameter initialization net as well. if arg.order == "NCHW": input_shape = [arg.batch_size, 3, input_size, input_size] else: input_shape = [arg.batch_size, input_size, input_size, 3] model.param_init_net.GaussianFill([], "data", shape=input_shape, mean=0.0, std=1.0) model.param_init_net.UniformIntFill([], "label", shape=[ arg.batch_size, ], min=0, max=999) # Note: even when we are running things on CPU, adding a few engine related # argument will not hurt since the CPU operator registy will simply ignore # these options and go the default path. for op in model.net.Proto().op: if op.type == 'Conv' or op.type == 'ConvFp16': op.engine = 'CUDNN' #op.arg.add().CopyFrom(utils.MakeArgument('ws_nbytes_limit', arg.cudnn_limit)) op.arg.add().CopyFrom(utils.MakeArgument('exhaustive_search', 1)) op.arg.add().CopyFrom( utils.MakeArgument('shared_ws_name', 'cudnn_workspace')) elif op.type in [ 'MaxPool', 'MaxPoolFp16', 'AveragePool', 'AveragePoolFp16', 'Relu', 'ReluFp16', 'Softmax', 'SoftmaxFp16' ]: op.engine = 'CUDNN' if arg.forward_only: print arg.model, ': running forward only.' else: print arg.model, ': running forward-backward.' model.AddGradientOperators() if arg.order == 'NHWC': print( '==WARNING==\n' 'NHWC order with CuDNN may not be supported yet, so I might\n' 'exit suddenly.') if not arg.cpu: model.param_init_net.RunAllOnGPU() model.net.RunAllOnGPU() workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) for i in range(arg.warmup_iterations): workspace.RunNet(model.net.Proto().name) start = time.time() for i in range(arg.iterations): workspace.RunNet(model.net.Proto().name) print 'Spent: ', (time.time() - start) / arg.iterations if arg.layer_wise_benchmark: print 'Layer-wise benchmark.' workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True) # Writes out the pbtxt for benchmarks on e.g. Android with open("{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.param_init_net.Proto())) with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid: fid.write(str(model.net.Proto()))
def parse_args(args): operator = caffe2_pb2.OperatorDef() for k, v in args.items(): arg = utils.MakeArgument(k, v) operator.arg.add().CopyFrom(arg) return operator.arg
def run_single_segms( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, rle_encode=True, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((LIMIT,), np.float32), "bbox_nms": np.zeros((LIMIT, 4), bbox_type), "class_nms": np.zeros((LIMIT,), np.int32), "mask_fcn_probs": np.zeros((LIMIT, CLASSES, RES, RES), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) classids = workspace.FetchBlob("class_nms") scores = workspace.FetchBlob("score_nms") # bbox scores, (R, ) boxes = workspace.FetchBlob("bbox_nms") # i.e., boxes, (R, 4*1) masks = workspace.FetchBlob("mask_fcn_probs") # (R, cls, mask_dim, mask_dim) if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 boxes /= scale except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) masks = np.zeros((R, 1, 1, 1), dtype=np.float32) # included in the model # scale = inputs["im_info"][0][2] # boxes /= scale R = boxes.shape[0] im_masks = [] if R > 0: im_dims = image.shape im_masks = utils2.compute_segm_results( masks, boxes, classids, im_dims[0], im_dims[1], rle_encode=rle_encode ) boxes = np.column_stack((boxes, scores)) ret = {"classids": classids, "boxes": boxes, "masks": masks, "im_masks": im_masks} return ret
def Train(args): # Either use specified device list or generate one if args.gpus is not None: gpus = [int(x) for x in args.gpus.split(',')] num_gpus = len(gpus) else: gpus = list(range(args.num_gpus)) num_gpus = args.num_gpus log.info("Running on GPUs: {}".format(gpus)) # Verify valid batch size total_batch_size = args.batch_size batch_per_device = total_batch_size // num_gpus assert \ total_batch_size % num_gpus == 0, \ "Number of GPUs must divide batch size" # Round down epoch size to closest multiple of batch size across machines global_batch_size = total_batch_size * args.num_shards epoch_iters = int(args.epoch_size / global_batch_size) args.epoch_size = epoch_iters * global_batch_size log.info("Using epoch size: {}".format(args.epoch_size)) # Create ModelHelper object # train_arg_scope = { # 'order': 'NCHW', # 'use_cudnn': True, # 'cudnn_exhaustice_search': True, # 'ws_nbytes_limit': (args.cudnn_workspace_limit_mb * 1024 * 1024), # } # train_model = model_helper.ModelHelper( # name="mobilenet", arg_scope=train_arg_scope # ) num_shards = args.num_shards rendezvous = None # Model building functions # def create_mobilenet_model_ops(model, loss_scale): # [softmax, loss] = mobilenet.create_mobilenet( # model, # "data", # num_input_channels=args.num_channels, # num_labels=args.num_labels, # label="label", # is_test=True, # ) # loss = model.Scale(loss, scale=loss_scale) # brew.accuracy(model, [softmax, "label"], "accuracy") # return [loss] # def add_optimizer(model): # stepsz = int(30 * args.epoch_size / total_batch_size / num_shards) # optimizer.add_weight_decay(model, args.weight_decay) # optimizer.build_sgd( # model, # args.base_learning_rate, # momentum=0.9, # nesterov=1, # policy="step", # stepsize=stepsz, # gamma=0.1 # ) # def add_image_input(model): # AddImageInput( # model, # reader, # batch_size=batch_per_device, # img_size=args.image_size, # ) # def add_post_sync_ops(model): # for param_info in model.GetOptimizationParamInfo(model.GetParams()): # if param_info.blob_copy is not None: # model.param_init_net.HalfToFloat( # param_info.blob, # param_info.blob_copy[core.DataType.FLOAT] # ) test_arg_scope = { 'order': "NCHW", # 'use_cudnn': True, # 'cudnn_exhaustive_search': True, } test_model = model_helper.ModelHelper(name="mobilenet_test", arg_scope=test_arg_scope) deploy_arg_scope = {'order': "NCHW"} deploy_model = model_helper.ModelHelper(name="mobilenet_deploy", arg_scope=deploy_arg_scope) mobilenet.create_mobilenet( deploy_model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, is_test=True, ) # raw_data = np.random.randn(1, 3, 224, 224).astype(np.float32) # workspace.FeedBlob("data", raw_data) # workspace.RunNetOnce(deploy_model.param_init_net) # workspace.CreateNet(deploy_model.net) # mobilenet.create_mobilenet( # test_model, # "gpu_0/data", # num_input_channels=args.num_channels, # num_labels=args.num_labels, # is_test=True, # ) # test_reader = test_model.CreateDB( # "test_reader", # db=args.test_data, # db_type=args.db_type, # ) # def test_input_fn(model): # AddImageInput( # model, # test_reader, # batch_size=batch_per_device, # img_size=args.image_size, # ) # data_parallel_model.Parallelize_GPU( # test_model, # input_builder_fun=test_input_fn, # forward_pass_builder_fun=create_mobilenet_model_ops, # post_sync_builder_fun=add_post_sync_ops, # param_update_builder_fun=None, # devices=gpus, # ) # inputs = np.zeros((32,3,224,224), dtype='f') # labels = np.zeros((32,), dtype='f') # workspace.FeedBlob("gpu_0/data", inputs) # workspace.FeedBlob("gpu_0/label", labels) workspace.RunNetOnce(test_model.param_init_net) workspace.CreateNet(test_model.net) LoadModel(args.load_model_path, test_model) prefix = "gpu_0/" for value in deploy_model.params: workspace.FeedBlob(value, workspace.FetchBlob(prefix + value)) # SaveModel(args, test_model) # workspace.ResetWorkspace() # print(workspace.Blobs()) # print(deploy_model.params) # print("=====================") # print(test_model.params) # print("=====================") # print(workspace.FetchBlob("gpu_0/comp_11_spatbn_2_rm")) # print(workspace.FetchBlob("comp_11_spatbn_2_rm")) # print(deploy_model.net.Proto()) # print(deploy_model.param_init_net.Proto()) # exit(0) init_net = caffe2_pb2.NetDef() # # print(len(deploy_model.params)) # # print(deploy_model.param_init_net.Proto()) # with open("params", 'wb') as f: # f.write(str(deploy_model.param_init_net.Proto())) tmp_o = np.zeros((1, 1)).astype(np.float32) # print(tmp_o.shape) # print(type(tmp_o)) # exit(0) init_net.name = "mobilenet_init" rm_riv = [] for value in deploy_model.params: tmp = workspace.FetchBlob(prefix + value) # print(type(tmp.shape), type(tmp)) if "spatbn" == str(value)[-10:-4]: # print(value) if "s" == str(value)[-1]: # print(str(value)[:-1] + "rm") # init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "rm"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])]) rm_riv.append( core.CreateOperator( "GivenTensorFill", [], [str(value)[:-1] + "rm"], arg=[ utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o) ])) rm_riv.append( core.CreateOperator( "GivenTensorFill", [], [str(value)[:-1] + "riv"], arg=[ utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o) ])) # elif "b" == str(value)[-1]: # # print(str(value)[:-1] + "riv") # init_net.op.extend([core.CreateOperator("GivenTensorFill", [], [str(value)[:-1] + "riv"], arg=[utils.MakeArgument("shape", tmp_o.shape), utils.MakeArgument("values", tmp_o)])]) init_net.op.extend([ core.CreateOperator("GivenTensorFill", [], [value], arg=[ utils.MakeArgument("shape", tmp.shape), utils.MakeArgument("values", tmp) ]) ]) init_net.op.extend([ core.CreateOperator("ConstantFill", [], ["data"], shape=(1, 3, 224, 224)) ]) # exit(0) # for value in rm_riv: # init_net.op.extend([value]) deploy_model.net._net.external_output.extend(["softmax"]) predict_net = deploy_model.net._net # print(dir(deploy_model.net._net)) # with open("pparams", 'wb') as f: # f.write(str(deploy_model.param_init_net.Proto())) # print(workspace.Blobs()) # for k, value in enumerate(deploy_model.params): # # print(k,value) # name = k + value # name = workspace.FetchBlob(prefix + value) # tmp_work = {value: workspace.FetchBlob(prefix + value) for value in deploy_model.params} # # tmp_params = (str(deploy_model.params) # workspace.ResetWorkspace() # # print(workspace.Blobs()) # # exit(0) # for value in deploy_model.params: # workspace.FeedBlob(value, tmp_work[value]) # # print(workspace.Blobs()) # print(workspace.FetchBlob("last_out_b")) # exit(0) # deploy_model.net._net.external_output.extend(["softmax"]) # #==================================================================== # init_net, predict_net = me.Export(workspace, deploy_model.net, deploy_model.params) # # print(dir(predict_net.op.remove)) # # # print(dir(caffe2_pb2.NetDef)) # # print("===========") # # init_net.op.pop(0) # flag_di = [] # print(len(init_net.op)) # for k, value in enumerate(init_net.op): # for x in value.output: # if ("data" == str(x)) and ("GivenTensorFill" == str(value.type)): # flag_di.append(k) # flag_di = sorted(flag_di) # for k, v in enumerate(flag_di): # init_net.op.pop(v - k) # print(len(init_net.op)) # flag_dp = [] # print(len(predict_net.external_input)) # for k, value in enumerate(predict_net.external_input): # if "data" == str(value): # flag_dp.append(k) # flag_dp = sorted(flag_dp) # for k, v in enumerate(flag_dp): # predict_net.external_input.pop(v - k) # print(len(predict_net.external_input)) # predict_net.external_input.extend(["data"]) # init_net.op.extend([core.CreateOperator("ConstantFill", [], ["data"], shape=(1, 3, 224, 224))]) # #============================================== with open("pred_net", 'wb') as f: f.write(str(predict_net)) # with open("e_pred_net", 'wb') as f: # f.write(str(e_predict_net)) with open("init_net", 'wb') as f: f.write(str(init_net)) with open(output_predict_net, 'wb') as f: f.write(predict_net.SerializeToString()) print(output_predict_net) with open(output_init_net, 'wb') as f: f.write(init_net.SerializeToString()) print(output_init_net) print("OK!")
def AddArgument(op, key, value): """Makes an argument based on the value type.""" op.arg.extend([utils.MakeArgument(key, value)])
def add_quantization_param_args_(op, q_param): op.arg.extend([ utils.MakeArgument("Y_scale", q_param.scale), utils.MakeArgument("Y_zero_point", q_param.zero_point), ])