def test_inference_client_generated_request_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT0', binary_data=False))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        request_body, header_length = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs)

        headers = {
            'Content-Type':
                'application/vnd.sagemaker-triton.binary+json;json-header-size={}'
                .format(header_length)
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        self.assertEqual(
            self.expected_result_, r.json(),
            "Expected response body: {}; got: {}".format(
                self.expected_result_, r.json()))
    def test_inference_client_generated_response_binary(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT0', binary_data=True))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs)

        headers = {'Content-Type': 'application/json'}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        header_length_prefix = "application/vnd.sagemaker-triton.binary+json;json-header-size="
        header_length_str = r.headers['Content-Type'][len(header_length_prefix
                                                         ):]
        result = httpclient.InferenceServerClient.parse_response_body(
            r._content, header_length=int(header_length_str))

        output0_data = result.as_numpy('OUTPUT0')
        output1_data = result.as_numpy('OUTPUT1')
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
Exemplo n.º 3
0
def test_infer_no_outputs(model_name,
                          input0_data,
                          input1_data,
                          headers=None,
                          request_compression_algorithm=None,
                          response_compression_algorithm=None):
    inputs = []
    inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
    inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

    # Initialize the data
    inputs[0].set_data_from_numpy(input0_data, binary_data=False)
    inputs[1].set_data_from_numpy(input1_data, binary_data=True)

    query_params = {'test_1': 1, 'test_2': 2}
    results = triton_client.infer(
        model_name,
        inputs,
        outputs=None,
        query_params=query_params,
        headers=headers,
        request_compression_algorithm=request_compression_algorithm,
        response_compression_algorithm=response_compression_algorithm)

    return results
    def test_malformed_binary_header_large_number(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=True)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT0', binary_data=False))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        request_body, header_length = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs)

        headers = {
            'Content-Type':
                'application/vnd.sagemaker-triton.binary+json;json-header-size=12345'
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        self.assertEqual(
            400, r.status_code,
            "Expected error code {} returned for the request; got: {}".format(
                400, r.status_code))
    def test_predict(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT0', binary_data=False))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs)

        headers = {'Content-Type': 'application/json'}
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(
            r._content)

        output0_data = result.as_numpy('OUTPUT0')
        output1_data = result.as_numpy('OUTPUT1')
        for i in range(16):
            self.assertEqual(output0_data[0][i], self.expected_output0_data_[i])
            self.assertEqual(output1_data[0][i], self.expected_output1_data_[i])
Exemplo n.º 6
0
 def test_ensemble_io(self):
     model_name = "ensemble_io"
     with self._shm_leak_detector.Probe() as shm_probe:
         with httpclient.InferenceServerClient("localhost:8000") as client:
             input0 = np.random.random([1000]).astype(np.float32)
             for model_1_in_gpu in [True, False]:
                 for model_2_in_gpu in [True, False]:
                     for model_3_in_gpu in [True, False]:
                         gpu_output = np.asarray([
                             model_1_in_gpu, model_2_in_gpu, model_3_in_gpu
                         ],
                                                 dtype=bool)
                         inputs = [
                             httpclient.InferInput(
                                 "INPUT0", input0.shape,
                                 np_to_triton_dtype(input0.dtype)),
                             httpclient.InferInput(
                                 "GPU_OUTPUT", gpu_output.shape,
                                 np_to_triton_dtype(gpu_output.dtype))
                         ]
                         inputs[0].set_data_from_numpy(input0)
                         inputs[1].set_data_from_numpy(gpu_output)
                         result = client.infer(model_name, inputs)
                         output0 = result.as_numpy('OUTPUT0')
                         self.assertIsNotNone(output0)
                         self.assertTrue(np.all(output0 == input0))
Exemplo n.º 7
0
    def test_no_update(self):
        # Test implicit state without updating any state
        triton_client = tritonhttpclient.InferenceServerClient(
            "localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput('INPUT', [1], 'INT32'))
        inputs.append(tritonhttpclient.InferInput('TEST_CASE', [1], 'INT32'))
        inputs[0].set_data_from_numpy(np.asarray([1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([1], dtype=np.int32))
        correlation_id = 3

        # Make sure the state is never updated.
        result_start = triton_client.infer(model_name="no_state_update",
                                           inputs=inputs,
                                           sequence_id=correlation_id,
                                           sequence_start=True)
        self.assertEqual(result_start.as_numpy('OUTPUT')[0], 1)
        for _ in range(10):
            result = triton_client.infer(model_name="no_state_update",
                                         inputs=inputs,
                                         sequence_id=correlation_id)
            self.assertEqual(result.as_numpy('OUTPUT')[0], 1)

        result_start = triton_client.infer(model_name="no_state_update",
                                           inputs=inputs,
                                           sequence_id=correlation_id,
                                           sequence_end=True)
        self.assertEqual(result.as_numpy('OUTPUT')[0], 1)
Exemplo n.º 8
0
    def predict(self, features: Dict) -> Dict:
        if not self.triton_client:
            self.triton_client = httpclient.InferenceServerClient(
                url=self.predictor_host, verbose=True)

        unique_ids = np.zeros([1, 1], dtype=np.int32)
        segment_ids = features["segment_ids"].reshape(1, 128)
        input_ids = features["input_ids"].reshape(1, 128)
        input_mask = features["input_mask"].reshape(1, 128)

        inputs = [
            httpclient.InferInput('unique_ids', [1, 1], "INT32"),
            httpclient.InferInput('segment_ids', [1, 128], "INT32"),
            httpclient.InferInput('input_ids', [1, 128], "INT32"),
            httpclient.InferInput('input_mask', [1, 128], "INT32")
        ]
        inputs[0].set_data_from_numpy(unique_ids)
        inputs[1].set_data_from_numpy(segment_ids)
        inputs[2].set_data_from_numpy(input_ids)
        inputs[3].set_data_from_numpy(input_mask)

        outputs = [
            httpclient.InferRequestedOutput('start_logits', binary_data=False),
            httpclient.InferRequestedOutput('end_logits', binary_data=False)
        ]
        result = self.triton_client.infer(self.model_name,
                                          inputs,
                                          outputs=outputs)
        return result.get_response()
Exemplo n.º 9
0
def run_infer(model_name,
              model_version,
              numerical_features,
              categorical_features,
              headers=None):
    inputs = []
    outputs = []
    num_type = "FP16" if numerical_features.dtype == np.float16 else "FP32"
    inputs.append(
        http_client.InferInput('input__0', numerical_features.shape, num_type))
    inputs.append(
        http_client.InferInput('input__1', categorical_features.shape,
                               "INT64"))

    # Initialize the data
    inputs[0].set_data_from_numpy(numerical_features, binary_data=True)
    inputs[1].set_data_from_numpy(categorical_features, binary_data=False)

    outputs.append(
        http_client.InferRequestedOutput('output__0', binary_data=True))
    results = triton_client.infer(
        model_name,
        inputs,
        model_version=str(model_version) if model_version != -1 else '',
        outputs=outputs,
        headers=headers)
    return results
    def _basic_inference(self,
                         shm_ip0_handle,
                         shm_ip1_handle,
                         shm_op0_handle,
                         shm_op1_handle,
                         error_msg,
                         big_shm_name="",
                         big_shm_size=64):
        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
        input1_data = np.ones(shape=16, dtype=np.int32)
        inputs = []
        outputs = []
        if _protocol == "http":
            triton_client = httpclient.InferenceServerClient(_url, verbose=True)
            inputs.append(httpclient.InferInput("INPUT0", [1, 16], "INT32"))
            inputs.append(httpclient.InferInput("INPUT1", [1, 16], "INT32"))
            outputs.append(
                httpclient.InferRequestedOutput('OUTPUT0', binary_data=True))
            outputs.append(
                httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        else:
            triton_client = grpcclient.InferenceServerClient(_url, verbose=True)
            inputs.append(grpcclient.InferInput("INPUT0", [1, 16], "INT32"))
            inputs.append(grpcclient.InferInput("INPUT1", [1, 16], "INT32"))
            outputs.append(grpcclient.InferRequestedOutput('OUTPUT0'))
            outputs.append(grpcclient.InferRequestedOutput('OUTPUT1'))

        inputs[0].set_shared_memory("input0_data", 64)

        if type(shm_ip1_handle) == np.array:
            inputs[1].set_data_from_numpy(input0_data, binary_data=True)
        elif big_shm_name != "":
            inputs[1].set_shared_memory(big_shm_name, big_shm_size)
        else:
            inputs[1].set_shared_memory("input1_data", 64)

        outputs[0].set_shared_memory("output0_data", 64)
        outputs[1].set_shared_memory("output1_data", 64)

        try:
            results = triton_client.infer("simple",
                                          inputs,
                                          model_version="",
                                          outputs=outputs)
            output = results.get_output('OUTPUT0')
            if _protocol == "http":
                output_datatype = output['datatype']
                output_shape = output['shape']
            else:
                output_datatype = output.datatype
                output_shape = output.shape
            output_dtype = utils.triton_to_np_dtype(output_datatype)
            output_data = shm.get_contents_as_numpy(shm_op0_handle,
                                                    output_dtype, output_shape)
            self.assertTrue(
                (output_data[0] == (input0_data + input1_data)).all(),
                "Model output does not match expected output")
        except Exception as ex:
            error_msg.append(str(ex))
Exemplo n.º 11
0
    def _optional_input_infer(self, model_name, has_input0, has_input1):
        with httpclient.InferenceServerClient("localhost:8000") as client:
            shape = (1, )
            if has_input0:
                input0_numpy = np.random.randint(0,
                                                 100,
                                                 size=shape,
                                                 dtype=np.int32)
            else:
                # Set the input0 to a default value if it is optional. This is
                # the input used by the model if it is not provided.
                input0_numpy = np.array([5], dtype=np.int32)

            if has_input1:
                input1_numpy = np.random.randint(0,
                                                 100,
                                                 size=shape,
                                                 dtype=np.int32)
            else:
                # Set the input1 to a default value if it is optional. This is
                # the input used by the model if it is not provided.
                input1_numpy = np.array([5], dtype=np.int32)

            inputs = []
            if has_input0:
                inputs.append(
                    httpclient.InferInput(
                        "INPUT0", shape,
                        np_to_triton_dtype(input0_numpy.dtype)))
                inputs[-1].set_data_from_numpy(input0_numpy)

            if has_input1:
                inputs.append(
                    httpclient.InferInput(
                        "INPUT1", shape,
                        np_to_triton_dtype(input1_numpy.dtype)))
                inputs[-1].set_data_from_numpy(input1_numpy)

            result = client.infer(model_name, inputs)
            output0 = result.as_numpy('OUTPUT0')
            self.assertIsNotNone(output0, "OUTPUT0 was not found.")

            output1 = result.as_numpy('OUTPUT1')
            self.assertIsNotNone(output1, "OUTPUT1 was not found.")

            expected_output0 = input0_numpy + input1_numpy
            expected_output1 = input0_numpy - input1_numpy
            np.testing.assert_equal(output0, expected_output0,
                                    "OUTPUT0 doesn't match expected OUTPUT0")
            np.testing.assert_equal(output1, expected_output1,
                                    "OUTPUT1 doesn't match expected OUTPUT1")
Exemplo n.º 12
0
    def test_ensemble(self):
        model_name = "ensemble"
        shape = [16]
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient("localhost:8000") as client:
                input_data_0 = np.random.random(shape).astype(np.float32)
                input_data_1 = np.random.random(shape).astype(np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", input_data_0.shape,
                        np_to_triton_dtype(input_data_0.dtype)),
                    httpclient.InferInput(
                        "INPUT1", input_data_1.shape,
                        np_to_triton_dtype(input_data_1.dtype))
                ]
                inputs[0].set_data_from_numpy(input_data_0)
                inputs[1].set_data_from_numpy(input_data_1)
                result = client.infer(model_name, inputs)
                output0 = result.as_numpy('OUTPUT0')
                output1 = result.as_numpy('OUTPUT1')
                self.assertIsNotNone(output0)
                self.assertIsNotNone(output1)

                self.assertTrue(np.allclose(output0, 2 * input_data_0))
                self.assertTrue(np.allclose(output1, 2 * input_data_1))

        model_name = "ensemble_gpu"
        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient("localhost:8000") as client:
                input_data_0 = np.random.random(shape).astype(np.float32)
                input_data_1 = np.random.random(shape).astype(np.float32)
                inputs = [
                    httpclient.InferInput(
                        "INPUT0", input_data_0.shape,
                        np_to_triton_dtype(input_data_0.dtype)),
                    httpclient.InferInput(
                        "INPUT1", input_data_1.shape,
                        np_to_triton_dtype(input_data_1.dtype))
                ]
                inputs[0].set_data_from_numpy(input_data_0)
                inputs[1].set_data_from_numpy(input_data_1)
                result = client.infer(model_name, inputs)
                output0 = result.as_numpy('OUTPUT0')
                output1 = result.as_numpy('OUTPUT1')
                self.assertIsNotNone(output0)
                self.assertIsNotNone(output1)

                self.assertTrue(np.allclose(output0, 2 * input_data_0))
                self.assertTrue(np.allclose(output1, 2 * input_data_1))
Exemplo n.º 13
0
    def test_infer(self):
        try:
            triton_client = httpclient.InferenceServerClient(
                url="localhost:8000")
        except Exception as e:
            print("channel creation failed: " + str(e))
            sys.exit(1)

        model_name = "libtorch_int32_int32_int32"

        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT__0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT__1', [1, 16], "INT32"))

        # Create the data for the two input tensors. Initialize the first
        # to unique integers and the second to all ones.
        input0_data = np.arange(start=0, stop=16, dtype=np.int32)
        input0_data = np.expand_dims(input0_data, axis=0)
        input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)

        # Initialize the data
        inputs[0].set_data_from_numpy(input0_data, binary_data=True)
        inputs[1].set_data_from_numpy(input1_data, binary_data=True)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT__0', binary_data=True))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT__1', binary_data=True))

        results = triton_client.infer(model_name, inputs, outputs=outputs)

        output0_data = results.as_numpy('OUTPUT__0')
        output1_data = results.as_numpy('OUTPUT__1')

        # Validate the results by comparing with precomputed values.
        for i in range(16):
            print(
                str(input0_data[0][i]) + " - " + str(input1_data[0][i]) +
                " = " + str(output0_data[0][i]))
            print(
                str(input0_data[0][i]) + " + " + str(input1_data[0][i]) +
                " = " + str(output1_data[0][i]))
            if (input0_data[0][i] - input1_data[0][i]) != output0_data[0][i]:
                print("sync infer error: incorrect difference")
                sys.exit(1)
            if (input0_data[0][i] + input1_data[0][i]) != output1_data[0][i]:
                print("sync infer error: incorrect sum")
                sys.exit(1)
Exemplo n.º 14
0
    def predict(self, deployment_name, df):
        single_input_np = None
        if isinstance(df, np.ndarray):
            single_input_np = df

        inputs = []
        if single_input_np is not None:
            model_metadata = self.triton_client.get_model_metadata(
                deployment_name)
            raise MlflowException("Unnamed input is not currently supported")
        else:
            if isinstance(df, pd.DataFrame):
                model_metadata = self.triton_client.get_model_metadata(
                    deployment_name)
                input_dtype = {}
                for input in model_metadata["inputs"]:
                    input_dtype[input["name"]] = triton_to_np_dtype(
                        input["datatype"])
                # Sanity check
                if len(df.columns) != 1:
                    raise MlflowException(
                        "Expect Pandas DataFrame has only 1 column")
                col = df.columns[0]
                for row in df.index:
                    val = df[col][row]
                    # Need to form numpy array of the data type expected
                    if type(df[col][row]) != np.ndarray:
                        val = np.array(val, dtype=input_dtype[row])
                    inputs.append(
                        tritonhttpclient.InferInput(
                            row, val.shape, np_to_triton_dtype(val.dtype)))
                    inputs[-1].set_data_from_numpy(val)
            else:
                for key, val in df:
                    inputs.append(
                        tritonhttpclient.InferInput(
                            key, val.shape, np_to_triton_dtype(val.dtype)))
                    inputs[-1].set_data_from_numpy(val)

        try:
            resp = self.triton_client.infer(model_name=deployment_name,
                                            inputs=inputs)
            res = {}
            for output in resp.get_response()['outputs']:
                res[output['name']] = resp.as_numpy(output['name'])
            return {"outputs": res}
        except InferenceServerException as ex:
            raise MlflowException(str(ex))
Exemplo n.º 15
0
def requestGenerator(batched_image_data, input_name, output_name, dtype, FLAGS):

    # Set the input data
    inputs = []
    if FLAGS.protocol.lower() == "grpc":
        inputs.append(
            grpcclient.InferInput(input_name, batched_image_data.shape, dtype))
        inputs[0].set_data_from_numpy(batched_image_data)
    else:
        inputs.append(
            httpclient.InferInput(input_name, batched_image_data.shape, dtype))
        inputs[0].set_data_from_numpy(batched_image_data, binary_data=True)

    outputs = []
    if FLAGS.protocol.lower() == "grpc":
        outputs.append(
            grpcclient.InferRequestedOutput(output_name,
                                            class_count=FLAGS.classes))
    else:
        outputs.append(
            httpclient.InferRequestedOutput(output_name,
                                            binary_data=True,
                                            class_count=FLAGS.classes))

    yield inputs, outputs, FLAGS.model_name, FLAGS.model_version
    def test_infer_pymodel_error(self):
        model_name = "wrong_model"
        shape = [2, 2]

        with self._shm_leak_detector.Probe() as shm_probe:
            with httpclient.InferenceServerClient("localhost:8000") as client:
                input_data = (16384 * np.random.randn(*shape)).astype(
                    np.uint32)
                inputs = [
                    httpclient.InferInput("IN", input_data.shape,
                                          np_to_triton_dtype(input_data.dtype))
                ]
                inputs[0].set_data_from_numpy(input_data)
                try:
                    client.infer(model_name, inputs)
                except InferenceServerException as e:
                    print(e.message())
                    self.assertTrue(
                        e.message().startswith(
                            "Failed to process the request(s) for model instance"
                        ), "Exception message is not correct")
                else:
                    self.assertTrue(
                        False,
                        "Wrong exception raised or did not raise an exception")
Exemplo n.º 17
0
    def run(self, output_names, input_feed, run_options=None):
        inputs = []
        for key, val in input_feed.items():
            val = np.expand_dims(val, axis=0)
            input = tritonhttpclient.InferInput(key, val.shape,
                                                self.dtype_mapping[key])
            input.set_data_from_numpy(val)
            inputs.append(input)

        outputs = []

        for output_name in output_names:
            output = tritonhttpclient.InferRequestedOutput(output_name)
            outputs.append(output)

        res = self.client.async_infer(self.model_name,
                                      inputs,
                                      request_id=str(self.request_count),
                                      outputs=outputs)
        res = res.get_result()
        results = []
        for output_name in output_names:
            results.append(res.as_numpy(output_name))

        return results
Exemplo n.º 18
0
def TestIdentityInference(np_array, binary_data):
    model_name = "savedmodel_zero_1_object"
    inputs = []
    outputs = []

    inputs.append(httpclient.InferInput('INPUT0', np_array.shape, "BYTES"))
    inputs[0].set_data_from_numpy(np_array, binary_data=binary_data)

    outputs.append(
        httpclient.InferRequestedOutput('OUTPUT0', binary_data=binary_data))

    results = triton_client.infer(model_name=model_name,
                                  inputs=inputs,
                                  outputs=outputs)
    if (np_array.dtype == np.object):
        if binary_data:
            if not np.array_equal(np_array,
                                  np.char.decode(results.as_numpy('OUTPUT0'))):
                print(results.as_numpy('OUTPUT0'))
                sys.exit(1)
        else:
            if not np.array_equal(np_array, results.as_numpy('OUTPUT0')):
                print(results.as_numpy('OUTPUT0'))
                sys.exit(1)
    else:
        encoded_results = np.char.encode(
            results.as_numpy('OUTPUT0').astype(str))
        if not np.array_equal(np_array, encoded_results):
            print(encoded_results)
            sys.exit(1)
Exemplo n.º 19
0
def sync_send(triton_client, result_list, values, batch_size, sequence_id,
              model_name, model_version):

    count = 1
    for value in values:
        # Create the tensor for INPUT
        value_data = np.full(shape=[batch_size, 1],
                             fill_value=value,
                             dtype=np.int32)
        inputs = []
        inputs.append(httpclient.InferInput('INPUT', value_data.shape,
                                            "INT32"))
        # Initialize the data
        inputs[0].set_data_from_numpy(value_data)
        outputs = []
        outputs.append(httpclient.InferRequestedOutput('OUTPUT'))
        # Issue the synchronous sequence inference.
        result = triton_client.infer(model_name=model_name,
                                     inputs=inputs,
                                     outputs=outputs,
                                     sequence_id=sequence_id,
                                     sequence_start=(count == 1),
                                     sequence_end=(count == len(values)))
        result_list.append(result.as_numpy('OUTPUT'))
        count = count + 1
  def generate_rest_request_from_dictionary(self, row_dict):
    triton_request_inputs = []
    for key, value in row_dict.items():
      t = clients.utils.get_type(value, self._default_float_type,
                                 self._default_int_type)
      if t == np.object_:
        value = clients.utils.map_multi_dimensional_list(
            value, lambda s: s.encode("utf-8"))
      numpy_value = np.array(value, dtype=t)
      triton_request_input = triton_httpclient.InferInput(
          key, list(numpy_value.shape), triton_utils.np_to_triton_dtype(t))
      triton_request_input.set_data_from_numpy(
          numpy_value, binary_data=True)  # binary_data=True by default
      triton_request_inputs.append(triton_request_input)
    # https://github.com/triton-inference-server/client/blob/530bcac5f1574aa2222930076200544eb274245c/src/python/library/tritonclient/http/__init__.py#L81
    # Returns tuple - request and request len to pass in Infer-Header-Content-Length header
    (request, json_size) = triton_httpclient._get_inference_request(
        inputs=triton_request_inputs,
        request_id="",
        outputs=None,
        sequence_id=0,
        sequence_start=0,
        sequence_end=0,
        priority=0,
        timeout=None)

    headers = {}
    if json_size:
      headers["Inference-Header-Content-Length"] = str(json_size)
    return (request, headers)
Exemplo n.º 21
0
    def test_incorrect_execute_return(self):
        model_name = 'execute_return_error'
        shape = [1, 1]
        with httpclient.InferenceServerClient("localhost:8000") as client:
            input_data = (5 * np.random.randn(*shape)).astype(np.float32)
            inputs = [
                httpclient.InferInput("INPUT", input_data.shape,
                                      np_to_triton_dtype(input_data.dtype))
            ]
            inputs[0].set_data_from_numpy(input_data)

            # The first request to this model will return None.
            with self.assertRaises(InferenceServerException) as e:
                client.infer(model_name, inputs)

            self.assertTrue(
                str(e.exception).startswith(
                    "Failed to process the request(s) for model instance "
                    "'execute_return_error_0', message: Expected a list in the "
                    "execute return"), "Exception message is not correct.")

            # The second inference request will return a list of None object
            # instead of Python InferenceResponse objects.
            with self.assertRaises(InferenceServerException) as e:
                client.infer(model_name, inputs)

            self.assertTrue(
                str(e.exception).startswith(
                    "Failed to process the request(s) for model instance "
                    "'execute_return_error_0', message: Expected an "
                    "'InferenceResponse' object in the execute function return"
                    " list"), "Exception message is not correct.")
Exemplo n.º 22
0
    def test_wrong_implicit_state_name(self):
        triton_client = tritonhttpclient.InferenceServerClient(
            "localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput('INPUT', [1], 'INT32'))
        inputs.append(tritonhttpclient.InferInput('TEST_CASE', [1], 'INT32'))
        inputs[0].set_data_from_numpy(
            np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(model_name="wrong_internal_state",
                                inputs=inputs,
                                sequence_id=2,
                                sequence_start=True)

        self.assertEqual(str(e.exception),
                         "state 'undefined_state' is not a valid state name.")
    def test_predict_specified_model(self):
        inputs = []
        outputs = []
        inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
        inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

        # Initialize the data
        input_data = np.array(self.input_data_, dtype=np.int32)
        input_data = np.expand_dims(input_data, axis=0)
        inputs[0].set_data_from_numpy(input_data, binary_data=False)
        inputs[1].set_data_from_numpy(input_data, binary_data=False)

        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT0', binary_data=False))
        outputs.append(
            httpclient.InferRequestedOutput('OUTPUT1', binary_data=False))
        request_body, _ = httpclient.InferenceServerClient.generate_request_body(
            inputs, outputs=outputs)

        headers = {
            'Content-Type':
                'application/json',
            "X-Vertex-Ai-Triton-Redirect":
                "v2/models/{}/infer".format(self.model_)
        }
        r = requests.post(self.url_, data=request_body, headers=headers)
        r.raise_for_status()

        result = httpclient.InferenceServerClient.parse_response_body(
            r._content)

        output0_data = result.as_numpy('OUTPUT0')
        output1_data = result.as_numpy('OUTPUT1')
        if self.model_ == "addsub":
            expected_output0_data = [x * 2 for x in self.input_data_]
            expected_output1_data = [0 for x in self.input_data_]
        else:
            expected_output0_data = [0 for x in self.input_data_]
            expected_output1_data = [x * 2 for x in self.input_data_]
        for i in range(16):
            self.assertEqual(output0_data[0][i], expected_output0_data[i])
            self.assertEqual(output1_data[0][i], expected_output1_data[i])
Exemplo n.º 24
0
    def test_no_implicit_state(self):
        triton_client = tritonhttpclient.InferenceServerClient(
            "localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput('INPUT', [1], 'INT32'))
        inputs.append(tritonhttpclient.InferInput('TEST_CASE', [1], 'INT32'))
        inputs[0].set_data_from_numpy(
            np.random.randint(5, size=[1], dtype=np.int32))
        inputs[1].set_data_from_numpy(np.asarray([0], dtype=np.int32))

        with self.assertRaises(InferenceServerException) as e:
            triton_client.infer(model_name="no_implicit_state",
                                inputs=inputs,
                                sequence_id=1,
                                sequence_start=True)

        self.assertEqual(
            str(e.exception),
            "unable to add state 'undefined_state'. State configuration is missing for model 'no_implicit_state'."
        )
Exemplo n.º 25
0
 def _infer_helper(self, model_name, shape, data_type):
     with httpclient.InferenceServerClient("localhost:8000") as client:
         input_data_0 = np.array(np.random.randn(*shape), dtype=data_type)
         inputs = [
             httpclient.InferInput("INPUT0", shape,
                                   np_to_triton_dtype(input_data_0.dtype))
         ]
         inputs[0].set_data_from_numpy(input_data_0)
         result = client.infer(model_name, inputs)
         output0 = result.as_numpy('OUTPUT0')
         self.assertTrue(np.all(input_data_0 == output0))
Exemplo n.º 26
0
    def test_http(self):
        triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
        inputs = []
        inputs.append(tritonhttpclient.InferInput('INPUT', [1], "UINT8"))
        inputs[0].set_data_from_numpy(np.arange(1, dtype=np.uint8))

        try:
            triton_client.infer(model_name="query", inputs=inputs)
            self.assertTrue(False, "expect error with query information")
        except InferenceServerException as ex:
            self.assertTrue("OUTPUT0 CPU 0" in ex.message())
            self.assertTrue("OUTPUT1 CPU 0" in ex.message())
Exemplo n.º 27
0
 def send_identity_request(self, client, model_name):
     inputs = []
     inputs.append(httpclient.InferInput('INPUT0', [1, 16], "FP32"))
     input0_data = np.arange(start=0, stop=16, dtype=np.float32)
     input0_data = np.expand_dims(input0_data, axis=0)
     inputs[0].set_data_from_numpy(input0_data)
     result = client.infer(
         model_name=model_name,
         inputs=inputs,
         outputs=[httpclient.InferRequestedOutput('OUTPUT0')])
     output_numpy = result.as_numpy('OUTPUT0')
     self.assertTrue(np.all(input0_data == output_numpy))
def test_infer(model_name, input0_data, input1_data, headers=None):
    inputs = []
    outputs = []
    inputs.append(httpclient.InferInput('INPUT0', [1, 16], "INT32"))
    inputs.append(httpclient.InferInput('INPUT1', [1, 16], "INT32"))

    # Initialize the data
    inputs[0].set_data_from_numpy(input0_data, binary_data=False)
    inputs[1].set_data_from_numpy(input1_data, binary_data=True)

    outputs.append(httpclient.InferRequestedOutput('OUTPUT0', binary_data=True))
    outputs.append(httpclient.InferRequestedOutput('OUTPUT1',
                                                   binary_data=False))
    query_params = {'test_1': 1, 'test_2': 2}
    results = triton_client.infer(model_name,
                                  inputs,
                                  outputs=outputs,
                                  query_params=query_params,
                                  headers=headers)

    return results
Exemplo n.º 29
0
def oneflow_infer(data):
    triton_client = httpclient.InferenceServerClient(url='127.0.0.1:8000')
    inputs = []
    inputs.append(httpclient.InferInput('INPUT_0', data.shape, "INT64"))
    inputs[0].set_data_from_numpy(data, binary_data=True)
    outputs = []
    outputs.append(
        httpclient.InferRequestedOutput('OUTPUT_0',
                                        binary_data=True,
                                        class_count=1))
    results = triton_client.infer("embedding", inputs=inputs, outputs=outputs)
    output_data = results.as_numpy('OUTPUT_0')
    return output_data
Exemplo n.º 30
0
 def test_bool(self):
     model_name = 'identity_bool'
     with httpclient.InferenceServerClient("localhost:8000") as client:
         input_data = np.array([[True, False, True]], dtype=bool)
         inputs = [
             httpclient.InferInput("INPUT0", input_data.shape,
                                   np_to_triton_dtype(input_data.dtype))
         ]
         inputs[0].set_data_from_numpy(input_data)
         result = client.infer(model_name, inputs)
         output0 = result.as_numpy('OUTPUT0')
         self.assertTrue(output0 is not None)
         self.assertTrue(np.all(output0 == input_data))