def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS, input_filenames): request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = FLAGS.model_name request.model_version = FLAGS.model_version filenames = [] if os.path.isdir(FLAGS.image_filename): filenames = [ os.path.join(FLAGS.image_filename, f) for f in os.listdir(FLAGS.image_filename) if os.path.isfile(os.path.join(FLAGS.image_filename, f)) ] else: filenames = [ FLAGS.image_filename, ] filenames.sort() output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() output.name = output_name output.parameters['classification'].int64_param = FLAGS.classes request.outputs.extend([output]) input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input.name = input_name input.datatype = dtype if format == mc.ModelInput.FORMAT_NHWC: input.shape.extend([FLAGS.batch_size, h, w, c]) else: input.shape.extend([FLAGS.batch_size, c, h, w]) # Preprocess image into input data according to model requirements # Preprocess the images into input data according to model # requirements image_data = [] for filename in filenames: img = Image.open(filename) image_data.append( preprocess(img, format, dtype, c, h, w, FLAGS.scaling)) # Send requests of FLAGS.batch_size images. input_bytes = None for idx in range(FLAGS.batch_size): # wrap over if requested batch size exceeds number of provided images img_idx = idx % len(filenames) input_filenames.append(filenames[img_idx]) if input_bytes is None: input_bytes = image_data[img_idx].tobytes() else: input_bytes += image_data[img_idx].tobytes() input_contents = grpc_service_v2_pb2.InferTensorContents() input_contents.raw_contents = input_bytes input.contents.CopyFrom(input_contents) request.inputs.extend([input]) yield request
def _get_inference_request(model_name, inputs, model_version, request_id, outputs, sequence_id=None, sequence_start=None, sequence_end=None): request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = model_name request.model_version = model_version if request_id != None: request.id = request_id for infer_input in inputs: request.inputs.extend([infer_input._get_tensor()]) for infer_output in outputs: request.outputs.extend([infer_output._get_tensor()]) if sequence_id: param = request.parameters['sequence_id'] param.int64_param = sequence_id if sequence_start: param = request.parameters['sequence_start'] param.bool_param = sequence_start if sequence_end: param = request.parameters['sequence_end'] param.bool_param = sequence_end return request
def __init__(self, name, shape=None, datatype=None): self._input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor( ) self._input.name = name if shape: self._input.ClearField('shape') self._input.shape.extend(shape) if datatype: self._input.datatype = datatype
def _get_inference_request(self, inputs, outputs, model_name, model_version, request_id, parameters): """Creates and initializes an inference request. Parameters ---------- inputs : list A list of InferInput objects, each describing data for a input tensor required by the model. outputs : list A list of InferOutput objects, each describing how the output data must be returned. Only the output tensors present in the list will be requested from the server. model_name: str The name of the model to run inference. model_version: str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. request_id: str Optional identifier for the request. If specified will be returned in the response. Default value is 'None' which means no request_id will be used. parameters: dict Optional inference parameters described as key-value pairs. Returns ------- ModelInferRequest The protobuf message holding the inference request. Raises ------ InferenceServerException If server fails to issue inference. """ request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = model_name request.model_version = model_version if request_id != None: request.id = request_id for infer_input in inputs: request.inputs.extend([infer_input._get_tensor()]) for infer_output in outputs: request.outputs.extend([infer_output._get_tensor()]) if parameters: for param_key in parameters: _set_parameter(request, key=param_key, value=parameters[param_key]) return request
def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS): request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = FLAGS.model_name request.model_version = FLAGS.model_version output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() output.name = output_name request.outputs.extend([output]) input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input.name = input_name input.datatype = dtype if format == mc.ModelInput.FORMAT_NHWC: input.shape.extend([FLAGS.batch_size, h, w, c]) else: input.shape.extend([FLAGS.batch_size, c, h, w]) # Preprocess image into input data according to model requirements image_data = None with Image.open(FLAGS.image_filename) as img: image_data = preprocess(img, format, dtype, c, h, w, FLAGS.scaling) # Send requests of FLAGS.batch_size images. input_bytes = None for idx in range(FLAGS.batch_size): if input_bytes is None: input_bytes = image_data.tobytes() else: input_bytes += image_data.tobytes() input_contents = grpc_service_v2_pb2.InferTensorContents() input_contents.raw_contents = input_bytes input.contents.CopyFrom(input_contents) request.inputs.extend([input]) yield request
def _get_inference_request(self, inputs, outputs, model_name, model_version, request_id, sequence_id): """Creates and initializes an inference request. Parameters ---------- inputs : list A list of InferInput objects, each describing data for a input tensor required by the model. outputs : list A list of InferOutput objects, each describing how the output data must be returned. Only the output tensors present in the list will be requested from the server. model_name: str The name of the model to run inference. model_version: str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. request_id: str Optional identifier for the request. If specified will be returned in the response. Default value is 'None' which means no request_id will be used. sequence_id : int The sequence ID of the inference request. Default is 0, which indicates that the request is not part of a sequence. The sequence ID is used to indicate that two or more inference requests are in the same sequence. """ self._request = grpc_service_v2_pb2.ModelInferRequest() self._request.model_name = model_name self._request.model_version = model_version if request_id != None: self._request.id = request_id if sequence_id != None: self._request.sequence_id = sequence_id for infer_input in inputs: self._request.inputs.extend([infer_input._get_tensor()]) for infer_output in outputs: self._request.outputs.extend([infer_output._get_tensor()])
response = grpc_stub.ServerMetadata(request) print("server metadata:\n{}".format(response)) request = grpc_service_v2_pb2.ModelMetadataRequest(name=model_name, version=model_version) response = grpc_stub.ModelMetadata(request) print("model metadata:\n{}".format(response)) # Configuration request = grpc_service_v2_pb2.ModelConfigRequest(name=model_name, version=model_version) response = grpc_stub.ModelConfig(request) print("model config:\n{}".format(response)) # Infer request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = model_name request.model_version = model_version request.id = model_name + "-id-0" ## Input data img = cv2.imread('triton/sample_data/maskimage.jpg') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) target_shape = (360, 360) height, width, _ = img.shape resized_img = cv2.resize(img, target_shape) image_np = resized_img / 255.0 image_exp = np.expand_dims(image_np, axis=0) image_transposed = image_exp.transpose((0, 3, 1, 2))
FLAGS = parser.parse_args() # We use a simple model that takes 2 input tensors of 16 integers # each and returns 2 output tensors of 16 integers each. One # output tensor is the element-wise sum of the inputs and one # output is the element-wise difference. model_name = "simple" model_version = "" batch_size = 1 # Create gRPC stub for communicating with the server channel = grpc.insecure_channel(FLAGS.url) grpc_stub = grpc_service_v2_pb2_grpc.GRPCInferenceServiceStub(channel) # Generate the request request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = model_name request.model_version = model_version # Input data input0_data = [i for i in range(16)] input1_data = [1 for i in range(16)] # Populate the inputs in inference request input0 = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input0.name = "INPUT0" input0.datatype = "INT32" input0.shape.extend([1, 16]) input0.contents.int_contents[:] = input0_data input1 = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor()
def __init__(self, name): self._output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() self._output.name = name
def requestGenerator(input_name, output_name, c, h, w, format, dtype, FLAGS, result_filenames): request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = FLAGS.model_name request.model_version = FLAGS.model_version filenames = [] if os.path.isdir(FLAGS.image_filename): filenames = [ os.path.join(FLAGS.image_filename, f) for f in os.listdir(FLAGS.image_filename) if os.path.isfile(os.path.join(FLAGS.image_filename, f)) ] else: filenames = [ FLAGS.image_filename, ] filenames.sort() output = grpc_service_v2_pb2.ModelInferRequest( ).InferRequestedOutputTensor() output.name = output_name output.parameters['classification'].int64_param = FLAGS.classes request.outputs.extend([output]) input = grpc_service_v2_pb2.ModelInferRequest().InferInputTensor() input.name = input_name input.datatype = dtype if format == mc.ModelInput.FORMAT_NHWC: input.shape.extend([FLAGS.batch_size, h, w, c]) else: input.shape.extend([FLAGS.batch_size, c, h, w]) # Preprocess image into input data according to model requirements # Preprocess the images into input data according to model # requirements image_data = [] for filename in filenames: img = Image.open(filename) image_data.append( preprocess(img, format, dtype, c, h, w, FLAGS.scaling)) # Send requests of FLAGS.batch_size images. If the number of # images isn't an exact multiple of FLAGS.batch_size then just # start over with the first images until the batch is filled. image_idx = 0 last_request = False while not last_request: input_bytes = None input_filenames = [] request.ClearField("inputs") for idx in range(FLAGS.batch_size): input_filenames.append(filenames[image_idx]) if input_bytes is None: input_bytes = image_data[image_idx].tobytes() else: input_bytes += image_data[image_idx].tobytes() image_idx = (image_idx + 1) % len(image_data) if image_idx == 0: last_request = True input_contents = grpc_service_v2_pb2.InferTensorContents() input_contents.raw_contents = input_bytes input.contents.CopyFrom(input_contents) request.inputs.extend([input]) result_filenames.append(input_filenames) yield request
response = grpc_stub.ServerMetadata(request) print("server metadata:\n{}".format(response)) request = grpc_service_v2_pb2.ModelMetadataRequest(name=model_name, version=model_version) response = grpc_stub.ModelMetadata(request) print("model metadata:\n{}".format(response)) # Configuration request = grpc_service_v2_pb2.ModelConfigRequest(name=model_name, version=model_version) response = grpc_stub.ModelConfig(request) print("model config:\n{}".format(response)) # Infer request = grpc_service_v2_pb2.ModelInferRequest() request.model_name = model_name request.model_version = model_version request.id = model_name + "-id-0" ## Input data img = cv2.imread('triton/sample_data/maskimage.jpg') img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) height = img.shape[0] width = img.shape[1] target_shape = (260, 260) resized_img = cv2.resize(img, target_shape) image_np = resized_img / 255.0 image_exp = np.expand_dims(image_np, axis=0) processed_data = np.float32(image_exp)