def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version # The 'map<string, TensorProto> inputs = 2;' in predict.proto corresponds to # the parameter inputs that we send to the signature definition in our model. # Here we fill in the inputs. These protos are copied from tensorflow serving # apis: # https://github.com/tensorflow/serving/tree/r1.7/tensorflow_serving/apis t = time.time() for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto) self.logger.debug('Making tensor protos took: {}'.format(time.time() - t)) try: # In the bottom of predict.proto, PredictResponse is defined. This message # has one field: 'map<string, TensorProto> outputs = 1;'. The key in this # map is our ‘add’ from our model. t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
def predict(self, request_data, request_timeout=10, max_message_size=4): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() # Changed to allow big images options = [('grpc.max_message_length', max_message_size * 1024 * 1024), ('grpc.max_send_message_length', max_message_size * 1024 * 1024), ('grpc.max_receive_message_length', max_message_size * 1024 * 1024) ] channel = grpc.insecure_channel(self.host, options=options) self.logger.debug('Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug('Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version t = time.time() for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto) self.logger.debug('Making tensor protos took: {}'.format(time.time() - t)) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug('Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version t = time.time() for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto) self.logger.debug('Making tensor protos took: {}'.format(time.time() - t)) try: t = time.time() predict_response = self.stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {'error': str(e)} return {}
def predict(self, request_data: List[Dict[str, Any]], request_timeout: int = 10): """ Get a model prediction on request data Args: request_data: List of input graph nodes in the model, containing the following required fields: 'data': The data to get predictions on 'in_tensor_dtype': The datatype of the input, legal types are the keys amongst "dtype_to_number", defined in util.py 'in_tensor_name': The name of the models input graph node, often 'inputs' request_timeout: timeout in seconds Returns: Empty dict on error, otherwise a dict with keys set as output graph node names and values set to their predicted value Example: >>> prod_client = ProdClient(host='localhost:9000', model_name='mnist', model_version=1) >>> prod_client.predict( ... request_data=[{ ... 'data': np.asarray([image]), ... 'in_tensor_dtype': 'DT_UINT8', ... 'in_tensor_name': 'inputs', ... }] ... ) """ self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() with grpc.insecure_channel(self.host, options=self.options) as channel: self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name if self.model_version > 0: request.model_spec.version.value = self.model_version t = time.time() for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto) self.logger.debug( 'Making tensor protos took: {}'.format(time.time() - t)) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict( predict_response) keys = [k for k in predict_response_dict] self.logger.info( 'Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
def predict(self, request_data, request_timeout=10): # SELF: is the class variable # REQUEST_DATA: is the input to the model # REQUEST_TIMEOUT: Time in seconds after which request will end # Start with logger informations self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) # We will check the time taken for each operation t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) # Here we will define the model name request.model_spec.name = self.model_name # Specific version of the model if self.model_version > 0: request.model_spec.version.value = self.model_version t = time.time() # Put inputs in the request for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[\ d['in_tensor_name']].CopyFrom(tensor_proto) self.logger.debug(\ 'Making tensor protos took: {}'.format(time.time() - t)) # Now we are ready for prediction following line will help us in that try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug(\ 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = \ predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('\ Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}