required=False, default='NONE', help='Type of scaling to apply to image pixels. Default is NONE.') parser.add_argument( '-u', '--url', type=str, required=False, default='localhost:8001', help='Inference server URL. Default is localhost:8001.') parser.add_argument('image_filename', type=str, help='Input image.') FLAGS = parser.parse_args() # Create gRPC client for communicating with the server try: triton_client = grpcclient.InferenceServerClient(FLAGS.url) except Exception as e: print("context creation failed: " + str(e)) sys.exit() # Make sure the model matches our requirements, and get some # properties of the model that we need for preprocessing try: model_meta = triton_client.get_model_metadata( model_name=FLAGS.model_name) except InferenceServerException as e: print("failed to retrieve the metadata: " + str(e)) sys.exit() try: model_config = triton_client.get_model_config(
# Will use two sequences and send them asynchronously. Note the # sequence IDs should be non-zero because zero is reserved for # non-sequence requests. sequence_id0 = 1000 + FLAGS.offset * 2 sequence_id1 = 1001 + FLAGS.offset * 2 result0_list = [] result1_list = [] user_data = UserData() # It is advisable to use client object within with..as clause # when sending streaming requests. This ensures the client # is closed when the block inside with exits. with grpcclient.InferenceServerClient(FLAGS.url) as triton_client: try: # Establish stream triton_client.start_stream(callback=partial(callback, user_data)) # Now send the inference sequences... async_stream_send(triton_client, [0] + values, batch_size, sequence_id0, model_name, model_version) async_stream_send(triton_client, [100] + [-1 * val for val in values], batch_size, sequence_id1, model_name, model_version) except InferenceServerException as error: print(error) sys.exit(1) # Retrieve results... recv_count = 0
action="store_true", required=False, default=False, help='Enable verbose output') parser.add_argument( '-u', '--url', type=str, required=False, default='localhost:8001', help='Inference server URL. Default is localhost:8001.') FLAGS = parser.parse_args() try: triton_client = grpcclient.InferenceServerClient(url=FLAGS.url, verbose=FLAGS.verbose) except Exception as e: print("channel creation failed: " + str(e)) sys.exit(1) # To make sure no shared memory regions are registered with the # server. triton_client.unregister_system_shared_memory() triton_client.unregister_cuda_shared_memory() # We use a simple model that takes 2 input tensors of 16 integers # each and returns 2 output tensors of 16 integers each. One # output tensor is the element-wise sum of the inputs and one # output is the element-wise difference. model_name = "simple" model_version = ""
'--verbose', action="store_true", required=False, default=False, help='Enable verbose output') parser.add_argument( '-u', '--url', type=str, required=False, default='localhost:8001', help='Inference server URL. Default is localhost:8001.') FLAGS = parser.parse_args() try: TRTISClient = grpcclient.InferenceServerClient(FLAGS.url) except Exception as e: print("context creation failed: " + str(e)) sys.exit() model_name = 'simple' # Health if TRTISClient.is_server_live(): print("PASS: is_server_live") if TRTISClient.is_server_ready(): print("PASS: is_server_ready") if TRTISClient.is_model_ready(model_name): print("PASS: is_model_ready")