Пример #1
0
def run_inference(runtime, net_id, images, labels, input_binding_info, output_binding_info):
    """Runs inference on a set of images.

    Args:
        runtime: Arm NN runtime
        net_id: Network ID
        images: Loaded images to run inference on
        labels: Loaded labels per class
        input_binding_info: Network input information
        output_binding_info: Network output information

    Returns:
        None
    """
    output_tensors = ann.make_output_tensors([output_binding_info])
    for idx, im in enumerate(images):
        # Create input tensors
        input_tensors = ann.make_input_tensors([input_binding_info], [im])

        # Run inference
        print("Running inference({0}) ...".format(idx))
        runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

        # Process output
        out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0]
        results = np.argsort(out_tensor)[::-1]
        print_top_n(5, results, labels, out_tensor)
Пример #2
0
def test_workload_tensors_to_ndarray(get_tensor_info_output):
    # Check shape and size of output from workload_tensors_to_ndarray matches expected.
    output_binding_info = get_tensor_info_output
    output_tensors = ann.make_output_tensors(output_binding_info)

    data = ann.workload_tensors_to_ndarray(output_tensors)

    for i in range(0, len(output_tensors)):
        assert data[i].shape == tuple(output_tensors[i][1].GetShape())
        assert data[i].size == output_tensors[i][1].GetNumElements()
Пример #3
0
def test_caffe_parser_end_to_end(shared_data_folder):
    parser = ann.ICaffeParser = ann.ICaffeParser()

    # Load the network specifying the inputs and outputs
    input_name = "Placeholder"
    tensor_shape = {input_name: ann.TensorShape((1, 1, 28, 28))}
    requested_outputs = ["output"]

    network = parser.CreateNetworkFromBinaryFile(
        os.path.join(shared_data_folder, 'mock_model.caffemodel'),
        tensor_shape, requested_outputs)

    # Specify preferred backend
    preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')]

    input_binding_info = parser.GetNetworkInputBindingInfo(input_name)

    options = ann.CreationOptions()
    runtime = ann.IRuntime(options)

    opt_network, messages = ann.Optimize(network, preferred_backends,
                                         runtime.GetDeviceSpec(),
                                         ann.OptimizerOptions())

    assert 0 == len(messages)

    net_id, messages = runtime.LoadNetwork(opt_network)

    assert "" == messages

    # Load test image data stored in input_caffe.npy
    input_tensor_data = np.load(
        os.path.join(shared_data_folder,
                     'caffe_parser/input_caffe.npy')).astype(np.float32)
    input_tensors = ann.make_input_tensors([input_binding_info],
                                           [input_tensor_data])

    # Load output binding info and
    outputs_binding_info = []
    for output_name in requested_outputs:
        outputs_binding_info.append(
            parser.GetNetworkOutputBindingInfo(output_name))
    output_tensors = ann.make_output_tensors(outputs_binding_info)

    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

    output_vectors = ann.workload_tensors_to_ndarray(output_tensors)

    # Load golden output file for result comparison.
    expected_output = np.load(
        os.path.join(shared_data_folder,
                     'caffe_parser/golden_output_caffe.npy'))

    # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this)
    np.testing.assert_almost_equal(output_vectors[0], expected_output, 4)
Пример #4
0
def test_aarch64_inference_results(mock_model_runtime):

    runtime = mock_model_runtime[0]
    net_id = mock_model_runtime[1]
    input_tensors = mock_model_runtime[2]
    output_tensors = mock_model_runtime[3]

    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

    output_vectors = ann.workload_tensors_to_ndarray(output_tensors)

    expected_outputs = expected_results = np.array([[4,  85, 108,  29,   8,  16,   0,   2,   5,   0]])

    for i in range(len(expected_outputs)):
        assert output_vectors[i].all() == expected_results[i].all()
Пример #5
0
def execute_network(input_tensors: list, output_tensors: list, runtime, net_id: int) -> List[np.ndarray]:
    """
    Executes inference for the loaded network.

    Args:
        input_tensors: The input frame tensor.
        output_tensors: The output tensor from output node.
        runtime: Runtime context for executing inference.
        net_id: Unique ID of the network to run.

    Returns:
        list: Inference results as a list of ndarrays.
    """
    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)
    output = ann.workload_tensors_to_ndarray(output_tensors)
    return output
Пример #6
0
def test_multiple_inference_runs_yield_same_result(count, mock_model_runtime):
    """
    Test that results remain consistent among multiple runs of the same inference.
    """
    runtime = mock_model_runtime[0]
    net_id = mock_model_runtime[1]
    input_tensors = mock_model_runtime[2]
    output_tensors = mock_model_runtime[3]

    expected_results = np.array([[4,  85, 108,  29,   8,  16,   0,   2,   5,   0]])

    for _ in range(count):
        runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

        output_vectors = ann.workload_tensors_to_ndarray(output_tensors)

        for i in range(len(expected_results)):
            assert output_vectors[i].all() == expected_results[i].all()
Пример #7
0
def test_onnx_parser_end_to_end(shared_data_folder):
    parser = ann.IOnnxParser = ann.IOnnxParser()

    network = parser.CreateNetworkFromBinaryFile(
        os.path.join(shared_data_folder, 'mock_model.onnx'))

    # load test image data stored in input_onnx.npy
    input_binding_info = parser.GetNetworkInputBindingInfo("input")
    input_tensor_data = np.load(
        os.path.join(shared_data_folder,
                     'onnx_parser/input_onnx.npy')).astype(np.float32)

    options = ann.CreationOptions()
    runtime = ann.IRuntime(options)

    preferred_backends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')]
    opt_network, messages = ann.Optimize(network, preferred_backends,
                                         runtime.GetDeviceSpec(),
                                         ann.OptimizerOptions())

    assert 0 == len(messages)

    net_id, messages = runtime.LoadNetwork(opt_network)

    assert "" == messages

    input_tensors = ann.make_input_tensors([input_binding_info],
                                           [input_tensor_data])
    output_tensors = ann.make_output_tensors(
        [parser.GetNetworkOutputBindingInfo("output")])

    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

    output = ann.workload_tensors_to_ndarray(output_tensors)

    # Load golden output file for result comparison.
    golden_output = np.load(
        os.path.join(shared_data_folder, 'onnx_parser/golden_output_onnx.npy'))

    # Check that output matches golden output to 4 decimal places (there are slight rounding differences after this)
    np.testing.assert_almost_equal(output[0], golden_output, decimal=4)
Пример #8
0
# Create a runtime object that will perform inference.
options = ann.CreationOptions()
runtime = ann.IRuntime(options)

# Backend choices earlier in the list have higher preference.
preferredBackends = [ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')]
opt_network, messages = ann.Optimize(network, preferredBackends,
                                     runtime.GetDeviceSpec(),
                                     ann.OptimizerOptions())

# Load the optimized network into the runtime.
net_id, _ = runtime.LoadNetwork(opt_network)
print(f"Loaded network, id={net_id}")
# Create an inputTensor for inference.
input_tensors = ann.make_input_tensors([input_binding_info], [image])

# Get output binding information for an output layer by using the layer name.
output_names = parser.GetSubgraphOutputTensorNames(graph_id)
output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0])
output_tensors = ann.make_output_tensors([output_binding_info])

runtime.EnqueueWorkload(0, input_tensors, output_tensors)
results = ann.workload_tensors_to_ndarray(output_tensors)
print(results[0])
print(output_tensors[0][1])
j = np.argmax(results[0])
if j == 0:
    print("Non-Fire")
else:
    print("Fire")
options = ann.CreationOptions()
rt = ann.IRuntime(options)
preferredBackends = [ ann.BackendId('CpuAcc'), ann.BackendId('CpuRef')]

opt_network, _ = ann.Optimize(network, preferredBackends, rt.GetDeviceSpec(), ann.OptimizerOptions())
net_id, _ = rt.LoadNetwork(opt_network)

input_names = parser.GetSubgraphInputTensorNames(0)
input_binding_info = parser.GetNetworkInputBindingInfo(0, input_names[0])
input_tensors = ann.make_input_tensors([input_binding_info], [image_data])

output_names = parser.GetSubgraphOutputTensorNames(0)
output_binding_info = parser.GetNetworkOutputBindingInfo(0, output_names[0])
output_tensors = ann.make_output_tensors([output_binding_info])


repeat=10
numpy_time = np.zeros(repeat)
for i in range(0,repeat):
    start_time = time.time()

    rt.EnqueueWorkload(0, input_tensors, output_tensors) # Run inference
    #out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0][0]

    elapsed_ms = (time.time() - start_time) * 1000
    numpy_time[i] = elapsed_ms


out_tensor = ann.workload_tensors_to_ndarray(output_tensors)[0][0]
print("armnn MobileNet v2 quant %-19s (%s)" % ("%.2f ms" % np.mean(numpy_time), "%.2f ms" % np.std(numpy_time)))
Пример #10
0
def test_add_constant_layer_to_fully_connected():

    inputWidth = 1
    inputHeight = 1
    inputChannels = 5
    inputNum = 2

    outputChannels = 3
    outputNum = 2

    inputShape = (inputNum, inputChannels, inputHeight, inputWidth)
    outputShape = (outputNum, outputChannels)
    weightsShape = (inputChannels, outputChannels)
    biasShape = (outputChannels, )

    input = np.array([[1.0, 2.0, 3.0, 4.0, 5.0], [5.0, 4.0, 3.0, 2.0, 1.0]],
                     dtype=np.float32)

    weights = np.array(
        [[.5, 2., .5], [.5, 2., 1.], [.5, 2., 2.], [.5, 2., 3.], [.5, 2., 4.]],
        dtype=np.float32)

    biasValues = np.array([10, 20, 30], dtype=np.float32)

    expectedOutput = np.array([[
        0.5 + 1.0 + 1.5 + 2.0 + 2.5 + biasValues[0], 2.0 + 4.0 + 6.0 + 8.0 +
        10. + biasValues[1], 0.5 + 2.0 + 6.0 + 12. + 20. + biasValues[2]
    ],
                               [
                                   2.5 + 2.0 + 1.5 + 1.0 + 0.5 + biasValues[0],
                                   10.0 + 8.0 + 6.0 + 4.0 + 2. + biasValues[1],
                                   2.5 + 4.0 + 6.0 + 6. + 4. + biasValues[2]
                               ]],
                              dtype=np.float32)

    network = ann.INetwork()

    input_info = ann.TensorInfo(ann.TensorShape(inputShape),
                                ann.DataType_Float32, 0, 0, True)
    input_tensor = ann.ConstTensor(input_info, input)
    input_layer = network.AddInputLayer(0, "input")

    w_info = ann.TensorInfo(ann.TensorShape(weightsShape),
                            ann.DataType_Float32, 0, 0, True)
    w_tensor = ann.ConstTensor(w_info, weights)
    w_layer = network.AddConstantLayer(w_tensor, "weights")

    b_info = ann.TensorInfo(ann.TensorShape(biasShape), ann.DataType_Float32,
                            0, 0, True)
    b_tensor = ann.ConstTensor(b_info, biasValues)
    b_layer = network.AddConstantLayer(b_tensor, "bias")

    fc_descriptor = ann.FullyConnectedDescriptor()
    fc_descriptor.m_BiasEnabled = True
    fc_descriptor.m_ConstantWeights = True
    fully_connected = network.AddFullyConnectedLayer(fc_descriptor, "fc")

    output_info = ann.TensorInfo(ann.TensorShape(outputShape),
                                 ann.DataType_Float32)
    output_tensor = ann.Tensor(output_info, np.zeros([1, 1], dtype=np.float32))
    output = network.AddOutputLayer(0, "output")

    input_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(0))
    w_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(1))
    b_layer.GetOutputSlot(0).Connect(fully_connected.GetInputSlot(2))
    fully_connected.GetOutputSlot(0).Connect(output.GetInputSlot(0))

    input_layer.GetOutputSlot(0).SetTensorInfo(input_info)
    w_layer.GetOutputSlot(0).SetTensorInfo(w_info)
    b_layer.GetOutputSlot(0).SetTensorInfo(b_info)
    fully_connected.GetOutputSlot(0).SetTensorInfo(output_info)

    preferred_backends = [ann.BackendId('CpuRef')]
    options = ann.CreationOptions()
    runtime = ann.IRuntime(options)
    opt_network, messages = ann.Optimize(network, preferred_backends,
                                         runtime.GetDeviceSpec(),
                                         ann.OptimizerOptions())
    net_id, messages = runtime.LoadNetwork(opt_network)

    input_tensors = [(0, input_tensor)]
    output_tensors = [(0, output_tensor)]
    runtime.EnqueueWorkload(net_id, input_tensors, output_tensors)

    output_vectors = ann.workload_tensors_to_ndarray(output_tensors)

    assert (output_vectors == expectedOutput).all()