def run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info): """Runs inference on a set of images. Args: runtime: Arm NN runtime net_id: Network ID images: Loaded images to run inference on labels: Loaded labels per class input_binding_info: Network input information output_binding_info: Network output information Returns: None """ output_tensors = ann.make_output_tensors([output_binding_info]) for idx, im in enumerate(images): # Create input tensors input_tensors = ann.make_input_tensors([input_binding_info], [im]) # Run inference print("Running inference({0}) ...".format(idx)) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) # Process output out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0] results = np.argsort(out_tensor)[::-1] print_top_n(5, results, labels, out_tensor)
def preprocess(frame: np.ndarray, input_binding_info: tuple): """ Takes a frame, resizes, swaps channels and converts data type to match model input layer. The converted frame is wrapped in a const tensor and bound to the input tensor. Args: frame: Captured frame from video. input_binding_info: Contains shape and data type of model input layer. Returns: Input tensor. """ # Swap channels and resize frame to model resolution frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resized_frame = resize_with_aspect_ratio(frame, input_binding_info) # Expand dimensions and convert data type to match model input data_type = np.float32 if input_binding_info[1].GetDataType( ) == ann.DataType_Float32 else np.uint8 resized_frame = np.expand_dims(np.asarray(resized_frame, dtype=data_type), axis=0) assert resized_frame.shape == tuple(input_binding_info[1].GetShape()) input_tensors = ann.make_input_tensors([input_binding_info], [resized_frame]) return input_tensors
def mock_model_runtime(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(os.path.join(shared_data_folder, 'mock_model.tflite')) graph_id = 0 input_binding_info = parser.GetNetworkInputBindingInfo(graph_id, "input_1") input_tensor_data = np.load(os.path.join(shared_data_folder, 'tflite_parser/input_lite.npy')) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) print(messages) net_id, messages = runtime.LoadNetwork(opt_network) print(messages) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) outputs_binding_info = [] for output_name in output_names: outputs_binding_info.append(parser.GetNetworkOutputBindingInfo(graph_id, output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) yield runtime, net_id, input_tensors, output_tensors
def test_caffe_parser_end_to_end(shared_data_folder): parser = ann.ICaffeParser = ann.ICaffeParser() # Load the network specifying the inputs and outputs input_name = "Placeholder" tensor_shape = {input_name: ann.TensorShape((1, 1, 28, 28))} requested_outputs = ["output"] network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.caffemodel'), tensor_shape, requested_outputs) # Specify preferred backend preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] input_binding_info = parser.GetNetworkInputBindingInfo(input_name) options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_caffe.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'caffe_parser/input_caffe.npy')).astype(np.float32) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) # Load output binding info and outputs_binding_info = [] for output_name in requested_outputs: outputs_binding_info.append( parser.GetNetworkOutputBindingInfo(output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. expected_output = np.load( os.path.join(shared_data_folder, 'caffe_parser/golden_output_caffe.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output_vectors[0], expected_output, 4)
def test_tflite_parser_end_to_end(shared_data_folder): parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, "mock_model.tflite")) graphs_count = parser.GetSubgraphCount() graph_id = graphs_count - 1 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) output_names = parser.GetSubgraphOutputTensorNames(graph_id) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_lite.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'tflite_parser/input_lite.npy')) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = [] for index, output_name in enumerate(output_names): out_bind_info = parser.GetNetworkOutputBindingInfo( graph_id, output_name) out_tensor_info = out_bind_info[1] out_tensor_id = out_bind_info[0] output_tensors.append((out_tensor_id, ann.Tensor(out_tensor_info))) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = [] for index, out_tensor in enumerate(output_tensors): output_vectors.append(out_tensor[1].get_memory_area()) # Load golden output file for result comparison. expected_outputs = np.load( os.path.join(shared_data_folder, 'tflite_parser/golden_output_lite.npy')) # Check that output matches golden output assert (expected_outputs == output_vectors[0]).all()
def run(self): self.start() image = cv2.imread(self.image) image = cv2.resize(image, (128, 128)) image = np.array(image, dtype=np.float32) / 255.0 # ONNX, Caffe and TF parsers also exist. parser = ann.ITfLiteParser() network = parser.CreateNetworkFromBinaryFile(self.model) graph_id = 0 input_names = parser.GetSubgraphInputTensorNames(graph_id) input_binding_info = parser.GetNetworkInputBindingInfo( graph_id, input_names[0]) input_tensor_id = input_binding_info[0] input_tensor_info = input_binding_info[1] # Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer # name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo( 0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) start = timer() runtime.EnqueueWorkload(0, input_tensors, output_tensors) end = timer() print('Elapsed time is ', (end - start) * 1000, 'ms') output, output_tensor_info = ann.from_output_tensor( output_tensors[0][1]) print(f"Output tensor info: {output_tensor_info}") print(output) j = np.argmax(output) if j == 0: print("Non-Fire") else: print("Fire")
def test_deserializer_end_to_end(shared_data_folder): parser = ann.IDeserializer() network = parser.CreateNetworkFromBinary( os.path.join(shared_data_folder, "mock_model.armnn")) # use 0 as a dummy value for layer_id, which is unused in the actual implementation layer_id = 0 input_name = 'input_1' output_name = 'dense/Softmax' input_binding_info = parser.GetNetworkInputBindingInfo( layer_id, input_name) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_lite.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'deserializer/input_lite.npy')) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = [] out_bind_info = parser.GetNetworkOutputBindingInfo(layer_id, output_name) out_tensor_info = out_bind_info[1] out_tensor_id = out_bind_info[0] output_tensors.append((out_tensor_id, ann.Tensor(out_tensor_info))) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = [] for index, out_tensor in enumerate(output_tensors): output_vectors.append(out_tensor[1].get_memory_area()) # Load golden output file for result comparison. expected_outputs = np.load( os.path.join(shared_data_folder, 'deserializer/golden_output_lite.npy')) # Check that output matches golden output assert (expected_outputs == output_vectors[0]).all()
def test_make_input_tensors(get_tensor_info_input): input_tensor_info = get_tensor_info_input input_data = [] for tensor_id, tensor_info in input_tensor_info: input_data.append( np.random.randint(0, 255, size=(1, tensor_info.GetNumElements())).astype( np.uint8)) input_tensors = ann.make_input_tensors(input_tensor_info, input_data) assert len(input_tensors) == 1 for tensor, tensor_info in zip(input_tensors, input_tensor_info): # Because we created ConstTensor function, we cannot check type directly. assert type(tensor[1]).__name__ == 'ConstTensor' assert str(tensor[1].GetInfo()) == str(tensor_info[1])
def test_onnx_parser_end_to_end(shared_data_folder): parser = ann.IOnnxParser = ann.IOnnxParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.onnx')) # load test image data stored in input_onnx.npy input_binding_info = parser.GetNetworkInputBindingInfo("input") input_tensor_data = np.load( os.path.join(shared_data_folder, 'onnx_parser/input_onnx.npy')).astype(np.float32) options = ann.CreationOptions() runtime = ann.IRuntime(options) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = ann.make_output_tensors( [parser.GetNetworkOutputBindingInfo("output")]) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. golden_output = np.load( os.path.join(shared_data_folder, 'onnx_parser/golden_output_onnx.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output[0], golden_output, decimal=4)
def prepare_input_tensors(audio_data, input_binding_info, mfcc_preprocessor): """ Takes a block of audio data, extracts the MFCC features, quantizes the array, and uses ArmNN to create the input tensors. Args: audio_data: The audio data to process mfcc_instance: the mfcc class instance input_binding_info: the model input binding info mfcc_preprocessor: the mfcc preprocessor instance Returns: input_tensors: the prepared input tensors, ready to be consumed by the ArmNN NetworkExecutor """ data_type = input_binding_info[1].GetDataType() input_tensor = mfcc_preprocessor.extract_features(audio_data) if data_type != ann.DataType_Float32: input_tensor = quantize_input(input_tensor, input_binding_info) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor]) return input_tensors
def test_make_input_tensors_fp16(get_tensor_info_input): # Check ConstTensor with float16 input_tensor_info = get_tensor_info_input input_data = [] for tensor_id, tensor_info in input_tensor_info: input_data.append( np.random.randint(0, 255, size=(1, tensor_info.GetNumElements())).astype( np.float16)) tensor_info.SetDataType( ann.DataType_Float16) # set datatype to float16 input_tensors = ann.make_input_tensors(input_tensor_info, input_data) assert len(input_tensors) == 1 for tensor, tensor_info in zip(input_tensors, input_tensor_info): # Because we created ConstTensor function, we cannot check type directly. assert type(tensor[1]).__name__ == 'ConstTensor' assert str(tensor[1].GetInfo()) == str(tensor_info[1]) assert tensor[1].GetDataType() == ann.DataType_Float16 assert tensor[1].GetNumElements() == 28 * 28 * 1 assert tensor[1].GetNumBytes() == ( 28 * 28 * 1) * 2 # check each element is two byte
# Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) print(f"Loaded network, id={net_id}") # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) runtime.EnqueueWorkload(0, input_tensors, output_tensors) results = ann.workload_tensors_to_ndarray(output_tensors) print(results[0]) print(output_tensors[0][1]) j = np.argmax(results[0]) if j == 0: print("Non-Fire") else: print("Fire")