def run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info): """Runs inference on a set of images. Args: runtime: Arm NN runtime net_id: Network ID images: Loaded images to run inference on labels: Loaded labels per class input_binding_info: Network input information output_binding_info: Network output information Returns: None """ output_tensors = ann.make_output_tensors([output_binding_info]) for idx, im in enumerate(images): # Create input tensors input_tensors = ann.make_input_tensors([input_binding_info], [im]) # Run inference print("Running inference({0}) ...".format(idx)) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) # Process output out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0] results = np.argsort(out_tensor)[::-1] print_top_n(5, results, labels, out_tensor)
def test_workload_tensors_to_ndarray(get_tensor_info_output): # Check shape and size of output from workload_tensors_to_ndarray matches expected. output_binding_info = get_tensor_info_output output_tensors = ann.make_output_tensors(output_binding_info) data = ann.workload_tensors_to_ndarray(output_tensors) for i in range(0, len(output_tensors)): assert data[i].shape == tuple(output_tensors[i][1].GetShape()) assert data[i].size == output_tensors[i][1].GetNumElements()
def test_caffe_parser_end_to_end(shared_data_folder): parser = ann.ICaffeParser = ann.ICaffeParser() # Load the network specifying the inputs and outputs input_name = "Placeholder" tensor_shape = {input_name: ann.TensorShape((1, 1, 28, 28))} requested_outputs = ["output"] network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.caffemodel'), tensor_shape, requested_outputs) # Specify preferred backend preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] input_binding_info = parser.GetNetworkInputBindingInfo(input_name) options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages # Load test image data stored in input_caffe.npy input_tensor_data = np.load( os.path.join(shared_data_folder, 'caffe_parser/input_caffe.npy')).astype(np.float32) input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) # Load output binding info and outputs_binding_info = [] for output_name in requested_outputs: outputs_binding_info.append( parser.GetNetworkOutputBindingInfo(output_name)) output_tensors = ann.make_output_tensors(outputs_binding_info) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. expected_output = np.load( os.path.join(shared_data_folder, 'caffe_parser/golden_output_caffe.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output_vectors[0], expected_output, 4)
def test_aarch64_inference_results(mock_model_runtime): runtime = mock_model_runtime[0] net_id = mock_model_runtime[1] input_tensors = mock_model_runtime[2] output_tensors = mock_model_runtime[3] runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) expected_outputs = expected_results = np.array([[4, 85, 108, 29, 8, 16, 0, 2, 5, 0]]) for i in range(len(expected_outputs)): assert output_vectors[i].all() == expected_results[i].all()
def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]: """ Executes inference for the loaded network. Args: input_tensors: The input frame tensor. output_tensors: The output tensor from output node. runtime: Runtime context for executing inference. net_id: Unique ID of the network to run. Returns: list: Inference results as a list of ndarrays. """ runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output = ann.workload_tensors_to_ndarray(output_tensors) return output
def test_multiple_inference_runs_yield_same_result(count, mock_model_runtime): """ Test that results remain consistent among multiple runs of the same inference. """ runtime = mock_model_runtime[0] net_id = mock_model_runtime[1] input_tensors = mock_model_runtime[2] output_tensors = mock_model_runtime[3] expected_results = np.array([[4, 85, 108, 29, 8, 16, 0, 2, 5, 0]]) for _ in range(count): runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) for i in range(len(expected_results)): assert output_vectors[i].all() == expected_results[i].all()
def test_onnx_parser_end_to_end(shared_data_folder): parser = ann.IOnnxParser = ann.IOnnxParser() network = parser.CreateNetworkFromBinaryFile( os.path.join(shared_data_folder, 'mock_model.onnx')) # load test image data stored in input_onnx.npy input_binding_info = parser.GetNetworkInputBindingInfo("input") input_tensor_data = np.load( os.path.join(shared_data_folder, 'onnx_parser/input_onnx.npy')).astype(np.float32) options = ann.CreationOptions() runtime = ann.IRuntime(options) preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) assert 0 == len(messages) net_id, messages = runtime.LoadNetwork(opt_network) assert "" == messages input_tensors = ann.make_input_tensors([input_binding_info], [input_tensor_data]) output_tensors = ann.make_output_tensors( [parser.GetNetworkOutputBindingInfo("output")]) runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output = ann.workload_tensors_to_ndarray(output_tensors) # Load golden output file for result comparison. golden_output = np.load( os.path.join(shared_data_folder, 'onnx_parser/golden_output_onnx.npy')) # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this) np.testing.assert_almost_equal(output[0], golden_output, decimal=4)
# Create a runtime object that will perform inference. options = ann.CreationOptions() runtime = ann.IRuntime(options) # Backend choices earlier in the list have higher preference. preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, messages = ann.Optimize(network, preferredBackends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) # Load the optimized network into the runtime. net_id, _ = runtime.LoadNetwork(opt_network) print(f"Loaded network, id={net_id}") # Create an inputTensor for inference. input_tensors = ann.make_input_tensors([input_binding_info], [image]) # Get output binding information for an output layer by using the layer name. output_names = parser.GetSubgraphOutputTensorNames(graph_id) output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) runtime.EnqueueWorkload(0, input_tensors, output_tensors) results = ann.workload_tensors_to_ndarray(output_tensors) print(results[0]) print(output_tensors[0][1]) j = np.argmax(results[0]) if j == 0: print("Non-Fire") else: print("Fire")
options = ann.CreationOptions() rt = ann.IRuntime(options) preferredBackends = [ ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')] opt_network, _ = ann.Optimize(network, preferredBackends, rt.GetDeviceSpec(), ann.OptimizerOptions()) net_id, _ = rt.LoadNetwork(opt_network) input_names = parser.GetSubgraphInputTensorNames(0) input_binding_info = parser.GetNetworkInputBindingInfo(0, input_names[0]) input_tensors = ann.make_input_tensors([input_binding_info], [image_data]) output_names = parser.GetSubgraphOutputTensorNames(0) output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0]) output_tensors = ann.make_output_tensors([output_binding_info]) repeat=10 numpy_time = np.zeros(repeat) for i in range(0,repeat): start_time = time.time() rt.EnqueueWorkload(0, input_tensors, output_tensors) # Run inference #out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0][0] elapsed_ms = (time.time() - start_time) * 1000 numpy_time[i] = elapsed_ms out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0][0] print("armnn MobileNet v2 quant %-19s (%s)" % ("%.2f ms" % np.mean(numpy_time), "%.2f ms" % np.std(numpy_time)))
def test_add_constant_layer_to_fully_connected(): inputWidth = 1 inputHeight = 1 inputChannels = 5 inputNum = 2 outputChannels = 3 outputNum = 2 inputShape = (inputNum, inputChannels, inputHeight, inputWidth) outputShape = (outputNum, outputChannels) weightsShape = (inputChannels, outputChannels) biasShape = (outputChannels, ) input = np.array([[1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]], dtype=np.float32) weights = np.array( [[.5, 2., .5], [.5, 2., 1.], [.5, 2., 2.], [.5, 2., 3.], [.5, 2., 4.]], dtype=np.float32) biasValues = np.array([10, 20, 30], dtype=np.float32) expectedOutput = np.array([[ 0.5 + 1.0 + 1.5 + 2.0 + 2.5 + biasValues[0], 2.0 + 4.0 + 6.0 + 8.0 + 10. + biasValues[1], 0.5 + 2.0 + 6.0 + 12. + 20. + biasValues[2] ], [ 2.5 + 2.0 + 1.5 + 1.0 + 0.5 + biasValues[0], 10.0 + 8.0 + 6.0 + 4.0 + 2. + biasValues[1], 2.5 + 4.0 + 6.0 + 6. + 4. + biasValues[2] ]], dtype=np.float32) network = ann.INetwork() input_info = ann.TensorInfo(ann.TensorShape(inputShape), ann.DataType_Float32, 0, 0, True) input_tensor = ann.ConstTensor(input_info, input) input_layer = network.AddInputLayer(0, "input") w_info = ann.TensorInfo(ann.TensorShape(weightsShape), ann.DataType_Float32, 0, 0, True) w_tensor = ann.ConstTensor(w_info, weights) w_layer = network.AddConstantLayer(w_tensor, "weights") b_info = ann.TensorInfo(ann.TensorShape(biasShape), ann.DataType_Float32, 0, 0, True) b_tensor = ann.ConstTensor(b_info, biasValues) b_layer = network.AddConstantLayer(b_tensor, "bias") fc_descriptor = ann.FullyConnectedDescriptor() fc_descriptor.m_BiasEnabled = True fc_descriptor.m_ConstantWeights = True fully_connected = network.AddFullyConnectedLayer(fc_descriptor, "fc") output_info = ann.TensorInfo(ann.TensorShape(outputShape), ann.DataType_Float32) output_tensor = ann.Tensor(output_info, np.zeros([1, 1], dtype=np.float32)) output = network.AddOutputLayer(0, "output") input_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(0)) w_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(1)) b_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(2)) fully_connected.GetOutputSlot(0).Connect(output.GetInputSlot(0)) input_layer.GetOutputSlot(0).SetTensorInfo(input_info) w_layer.GetOutputSlot(0).SetTensorInfo(w_info) b_layer.GetOutputSlot(0).SetTensorInfo(b_info) fully_connected.GetOutputSlot(0).SetTensorInfo(output_info) preferred_backends = [ann.BackendId('CpuRef')] options = ann.CreationOptions() runtime = ann.IRuntime(options) opt_network, messages = ann.Optimize(network, preferred_backends, runtime.GetDeviceSpec(), ann.OptimizerOptions()) net_id, messages = runtime.LoadNetwork(opt_network) input_tensors = [(0, input_tensor)] output_tensors = [(0, output_tensor)] runtime.EnqueueWorkload(net_id, input_tensors, output_tensors) output_vectors = ann.workload_tensors_to_ndarray(output_tensors) assert (output_vectors == expectedOutput).all()