def _LoadTest(keep_device, device_type, gpu_id, blobs, loadAll): """A helper subfunction to test keep and not keep.""" op = core.CreateOperator( "Load", [], blobs, absolute_path=1, db=str(tmp_folder / "db"), db_type=self._db_type, device_option=dst_device_option, keep_device=keep_device, load_all=loadAll) self.assertTrue(workspace.RunOperatorOnce(op)) for i, arr in enumerate(arrays): self.assertTrue(workspace.HasBlob(str(i))) fetched = workspace.FetchBlob(str(i)) self.assertEqual(fetched.dtype, arr.dtype) np.testing.assert_array_equal( workspace.FetchBlob(str(i)), arr) proto = caffe2_pb2.BlobProto() proto.ParseFromString(workspace.SerializeBlob(str(i))) self.assertTrue(proto.HasField('tensor')) self.assertEqual(proto.tensor.device_detail.device_type, device_type) if core.IsGPUDeviceType(device_type): self.assertEqual(proto.tensor.device_detail.device_id, gpu_id)
def _LoadTest(keep_device, device_type, gpu_id): """A helper subfunction to test keep and not keep.""" op = core.CreateOperator("Load", [], [str(i) for i in range(len(arrays))], absolute_path=1, db=os.path.join(tmp_folder, "db"), db_type=self._db_type, device_option=dst_device_option, keep_device=keep_device) self.assertTrue(workspace.RunOperatorOnce(op)) for i, arr in enumerate(arrays): self.assertTrue(workspace.HasBlob(str(i))) fetched = workspace.FetchBlob(str(i)) self.assertEqual(fetched.dtype, arr.dtype) np.testing.assert_array_equal(workspace.FetchBlob(str(i)), arr) proto = caffe2_pb2.BlobProto() proto.ParseFromString(workspace.SerializeBlob(str(i))) self.assertTrue(proto.HasField('tensor')) self.assertEqual(proto.tensor.device_detail.device_type, device_type) if device_type == caffe2_pb2.CUDA: self.assertEqual( proto.tensor.device_detail.cuda_gpu_id, gpu_id)
os.system( 'adb shell /data/local/tmp/speed_benchmark ' # binary to execute '--init_net=/data/local/tmp/super_resolution_mobile_init.pb ' # mobile init_net '--net=/data/local/tmp/super_resolution_mobile_predict.pb ' # mobile predict_net '--input=9 ' # name of our input image blob '--input_file=/data/local/tmp/input.blobproto ' # serialized input image '--output_folder=/data/local/tmp ' # destination folder for saving mobile output '--output=27,9 ' # output blobs we are interested in '--iter=1 ' # number of net iterations to execute '--caffe2_log_level=0 ') # get the model output from adb and save to a file os.system('adb pull /data/local/tmp/27 ./output.blobproto') # We can recover the output content and post-process the model using same steps as we followed earlier blob_proto = caffe2_pb2.BlobProto() blob_proto.ParseFromString(open('./output.blobproto').read()) img_out = utils.Caffe2TensorToNumpyArray(blob_proto.tensor) img_out_y = Image.fromarray(np.uint8((img_out[0, 0]).clip(0, 255)), mode='L') final_img = Image.merge("YCbCr", [ img_out_y, img_cb.resize(img_out_y.size, Image.BICUBIC), img_cr.resize(img_out_y.size, Image.BICUBIC), ]).convert("RGB") final_img.save("./_static/img/cat_superres_mobile.jpg") ###################################################################### # Now, you can compare the image ``cat_superres.jpg`` (model output from # pure caffe2 backend execution) and ``cat_superres_mobile.jpg`` (model # output from mobile execution) and see that both the images look same. If # they don't look same, something went wrong with execution on mobile and
def run_single_segms( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, rle_encode=True, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((LIMIT,), np.float32), "bbox_nms": np.zeros((LIMIT, 4), bbox_type), "class_nms": np.zeros((LIMIT,), np.int32), "mask_fcn_probs": np.zeros((LIMIT, CLASSES, RES, RES), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) classids = workspace.FetchBlob("class_nms") scores = workspace.FetchBlob("score_nms") # bbox scores, (R, ) boxes = workspace.FetchBlob("bbox_nms") # i.e., boxes, (R, 4*1) masks = workspace.FetchBlob("mask_fcn_probs") # (R, cls, mask_dim, mask_dim) if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 boxes /= scale except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) masks = np.zeros((R, 1, 1, 1), dtype=np.float32) # included in the model # scale = inputs["im_info"][0][2] # boxes /= scale R = boxes.shape[0] im_masks = [] if R > 0: im_dims = image.shape im_masks = utils2.compute_segm_results( masks, boxes, classids, im_dims[0], im_dims[1], rle_encode=rle_encode ) boxes = np.column_stack((boxes, scores)) ret = {"classids": classids, "boxes": boxes, "masks": masks, "im_masks": im_masks} return ret
def run_single_kpts( net, image, target_size, pixel_means=PIXEL_MEANS_DEFAULT, pixel_stds=PIXEL_STDS_DEFAULT, max_size=1333, ): inputs = utils2.prepare_blobs( image, target_size=target_size, max_size=max_size, pixel_means=pixel_means, pixel_stds=pixel_stds, ) # Prepare inputs for AABB and Int8AABB operators im_info = inputs["im_info"] scale = im_info[0][2] inputs["im_infoq"] = np.rint(im_info[:,:2] * 8.0).astype(np.uint16) inputs["im_info2"] = im_info[:,:2] blob_names = [] ser_blobs = [] # Serialize inputs for remote device for k, v in inputs.items(): workspace.FeedBlob(k, v) blob_names.append(k) ser_blobs.append(workspace.SerializeBlob(k)) # Serialize output templates for remote device fully_quantized = any(op.type == "Int8AABBRoIProposals" for op in net.op) bbox_type = np.uint16 if fully_quantized else np.float32 output_templates = { "score_nms": np.zeros((3,), np.float32), "keypoint_rois": np.zeros((3, 4), bbox_type), "keypoints_out": np.zeros((3, 17, 2), bbox_type), "class_nms": np.zeros((3,), np.int32), "keypoints_scores_out": np.zeros((3, 17), np.float32), } for out_name in net.external_output: fake_name = out_name + "_empty_template" blob_names.append(out_name) workspace.FeedBlob(fake_name, output_templates[out_name]) ser_blobs.append(workspace.SerializeBlob(fake_name)) # Package inputs and output templates inout_netdef = caffe2_pb2.NetDef() inout_netdef.arg.extend([ utils.MakeArgument("blob_names", blob_names), utils.MakeArgument("ser_blobs", ser_blobs), ]) # Send in/out to the remote device with tempfile.NamedTemporaryFile() as inout_file: inout_file.write(inout_netdef.SerializeToString()) inout_file.flush() subprocess.check_call(["adb", "push", inout_file.name, "/data/local/tmp/input_output.pb"]) try: # Run the model use_caffe2 = "--use_caffe2_reference true" if os.environ.get("USE_CAFFE2_REFERENCE") in ("1", "true", "yes", "on") else "" subprocess.check_call("adb shell 'cd /data/local/tmp ; GLOG_logtostderr=true GLOG_v=0 ./nnapi_runner %s --init_net init_net.pb --predict_net predict_net.pb --inout_net input_output.pb --out_path output_blobs.pb'" % use_caffe2, shell=True) # Retrieve and deserialize outputs with tempfile.TemporaryDirectory() as tmpdir: output_file = os.path.join(tmpdir, "output_blobs.pb") subprocess.check_call(["adb", "pull", "/data/local/tmp/output_blobs.pb", output_file]) out_net = caffe2_pb2.NetDef() with open(output_file, "rb") as handle: out_net.ParseFromString(handle.read()) all_outputs = utils.ArgsToDict(out_net.arg)["outputs"] for output in all_outputs: bp = caffe2_pb2.BlobProto() bp.ParseFromString(output) workspace.DeserializeBlob(bp.name, output) scores = workspace.FetchBlob("score_nms") boxes = workspace.FetchBlob("keypoint_rois") coords_preds = workspace.FetchBlob("keypoints_out") scores_preds = workspace.FetchBlob("keypoints_scores_out") classids = workspace.FetchBlob("class_nms") if boxes.dtype == np.uint16: boxes = boxes.astype(np.float32) * 0.125 # New output format of AABBRoIKeypoints: # - XY coordinates are [num_rois, num_keypoints, 2] array in keypoints_out # - Scores are [num_rois, num_keypoints] array in keypoints_scores_out if coords_preds.dtype == np.uint16: coords_preds = coords_preds.astype(np.float32) * 0.125 assert coords_preds.shape[:2] == scores_preds.shape num_rois, num_keypoints = coords_preds.shape[:2] xy_preds = np.concatenate( (coords_preds, scores_preds.reshape([num_rois, num_keypoints, 1]), np.zeros([num_rois, num_keypoints, 1], dtype=np.float32)), axis=2) assert xy_preds.shape == (num_rois, num_keypoints, 4) xy_preds = np.swapaxes(xy_preds, 1, 2) assert xy_preds.shape == (num_rois, 4, num_keypoints) except Exception as e: print(e) # may not detect anything at all R = 0 scores = np.zeros((R,), dtype=np.float32) boxes = np.zeros((R, 4), dtype=np.float32) xy_preds = np.zeros((R, 4, 1), dtype=np.float32) classids = np.zeros((R,), dtype=np.float32) scale = inputs["im_info"][0][2] boxes /= scale if xy_preds is not None: xy_preds /= scale boxes = np.column_stack((boxes, scores)) return boxes, xy_preds, classids