def format_grpc_request(model_name, model_version, model_signatures, signature_name, inputs_to_predict): """ Formats a given GRPC request (for sending to TF-serving model server) :param model_name: (str) name of model in TF-serving model server that you want to predict with :param model_version: (int) version of model in TF-serving model server that you want to predict with :param model_signatures: (dict) :param signature_name: (dict) :param inputs_to_predict: (list(dict(object))) list of inputs (where each input is a dict mapping from input name to input object, named according to the model_spec) :return: GRPC request object """ try: tf_request = PredictRequest() tf_request.model_spec.name = model_name tf_request.model_spec.version.value = int(model_version) tf_request.model_spec.signature_name = signature_name model_spec_inputs = model_signatures[signature_name]["input"] grpc_inputs = {k: [] for k in model_spec_inputs.keys()} for model_input in inputs_to_predict: for k in model_spec_inputs.keys(): grpc_inputs[k].append(model_input[k]) for k, v in model_spec_inputs.items(): tf_input = np.array(grpc_inputs[k]) assert tf_input.shape[1:] == tuple( v['shape'][1:]) # make sure inputs dim == model_spec tensor_proto = make_tensor_proto(tf_input, shape=tf_input.shape) tf_request.inputs[k].CopyFrom(tensor_proto) return tf_request except Exception as e: print(e) return False
def predict(url, model, tensor): ''' ''' host, port = url.split(':') stub = _create_prediction_service_stub(host, port) # str -> np array np_array = np.array(eval(tensor)) # -> tf tensor request_tensor = make_tensor_proto(np_array) # make a call request = PredictRequest( model_spec=ModelSpec(name=model), inputs={ 'input': request_tensor } ) response = stub.Predict(request) # print results click.echo('results') for key, val_tf_tensor in response.outputs.items(): nd_array = make_np_array(val_tf_tensor) click.echo(' {}: {}'.format(key, nd_array))
def predict(self, idImage: str) -> dict: """[Open GRPC serve for TF ] Args: idImage (str): [id of image input] Returns: [dictionary]: [return formated dictionary ready to be sent as a JSON] """ #call pre_process input_tensor = self.pre_process(idImage) max = 256 * 128 * 128 * 10 * 10 #Max data sent by grpc channel = grpc.insecure_channel( settings.TENSORFLOW_SERVING_ADDRESS + ':' + settings.TENSORFLOW_SERVING_PORT, options=[('grpc.max_message_length', max), ('grpc.max_send_message_length', max), ('grpc.max_receive_message_length', max)]) version = Int64Value(value=1) # version hardcodee model_spec = ModelSpec(version=version, name=self.get_model_name(), signature_name='serving_default') grpc_request = PredictRequest(model_spec=model_spec) grpc_request.inputs[self.get_input_name()].CopyFrom(input_tensor) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) result = stub.Predict(grpc_request, 10) # call post_process formated_result = self.post_process(result) return formated_result
def _make_request(self, request: Request) -> PredictRequest: pred_request = PredictRequest() pred_request.model_spec.name = self.model_name pred_request.model_spec.signature_name = self.signature_name for k, v in request.query.items(): request.inputs[k].CopyFrom(tf.make_tensor_proto(v)) return pred_request
def main(words, words_len): request = PredictRequest() request.model_spec.name = "estimator_ner" request.model_spec.signature_name = "serving_default" request.inputs["words"].CopyFrom(tf.make_tensor_proto(words)) request.inputs["words_len"].CopyFrom(tf.make_tensor_proto(words_len)) feature = stub.Predict(request, 5.0) return feature
def make_request(self, input_batch) -> PredictRequest: input_batch = np.stack(input_batch) request = PredictRequest() request.model_spec.name = self.model_info.architecture request.model_spec.signature_name = self.signature_name for input_ in self.model_info.inputs: tensor_proto = tf.make_tensor_proto(input_batch, shape=input_batch.shape) request.inputs[input_.name].CopyFrom(tensor_proto) return request
def create_request(num, images): images = np.asarray(images).astype(np.float32).flatten() image_proto = tf.make_tensor_proto(values=images, shape=[num, 28, 28, 1], dtype=tf.float32) request = PredictRequest() request.model_spec.name = 'doodle' request.model_spec.signature_name = 'serving_default' request.inputs['image'].CopyFrom(image_proto) return request
def __init__(self, model_name="VehicleDetector", min_confidence=0.8): self.min_confidence = min_confidence self.request = PredictRequest() self.request.model_spec.name = model_name self.request.model_spec.signature_name = "serving_default" self.channel = grpc.insecure_channel('localhost:8500') self.predict_service = prediction_service_pb2_grpc.PredictionServiceStub(self.channel) self.sub_start_x = None
def prepare_request(self, input_data): request = PredictRequest() request.model_spec.name = self.name request.model_spec.signature_name = self.signature_name request.inputs[self.input_name].CopyFrom( tf.make_tensor_proto( tf.convert_to_tensor( input_data, dtype = tf.string ) ) ) return request
def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name request.model_spec.signature_name = 'predict_images' if self.model_version > 0: request.model_spec.version.value = self.model_version request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(request_data, shape=[1, request_data.size])) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
def doTest(host, port): from tensorflow_serving.apis.predict_pb2 import PredictRequest from tensorflow_serving.apis.prediction_service_pb2_grpc import PredictionServiceStub from grpc import insecure_channel, StatusCode from tensorflow.contrib.util import make_tensor_proto, make_ndarray from tensorflow import float32 target = "%s:%s"%(host, port) print "Sending prediction request to", target, "\n" channel = insecure_channel(target) stub = PredictionServiceStub(channel) request = PredictRequest() request.model_spec.name = "campaign" request.model_spec.signature_name = "" request.inputs["hour"].CopyFrom(make_tensor_proto(6, shape=[1], dtype=float32)) request.inputs["week"].CopyFrom(make_tensor_proto(5, shape=[1], dtype=float32)) request.inputs["sid"].CopyFrom(make_tensor_proto("47320", shape=[1])) request.inputs["sspid"].CopyFrom(make_tensor_proto("3", shape=[1])) request.inputs["country"].CopyFrom(make_tensor_proto("DK", shape=[1])) request.inputs["os"].CopyFrom(make_tensor_proto("6", shape=[1])) request.inputs["domain"].CopyFrom(make_tensor_proto("video9.in", shape=[1])) request.inputs["isp"].CopyFrom(make_tensor_proto("Tele Danmark", shape=[1])) request.inputs["browser"].CopyFrom(make_tensor_proto("4", shape=[1])) request.inputs["type"].CopyFrom(make_tensor_proto("site", shape=[1])) request.inputs["lat"].CopyFrom(make_tensor_proto(35000, shape=[1], dtype=float32)) request.inputs["lon"].CopyFrom(make_tensor_proto(105000, shape=[1], dtype=float32)) request.inputs["connectiontype"].CopyFrom(make_tensor_proto("2", shape=[1])) request.inputs["devicetype"].CopyFrom(make_tensor_proto("1", shape=[1])) request.inputs["donottrack"].CopyFrom(make_tensor_proto("0", shape=[1])) request.inputs["userid"].CopyFrom(make_tensor_proto("984273063", shape=[1])) request.inputs["ua"].CopyFrom(make_tensor_proto("Mozilla/5.0 (Linux; U; Android 5.1.1; en-US; Redmi Note 3 Build/LMY47V) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 UCBrowser/11.0.8.855 U3/0.8.0 Mobile Safari/534.30", shape=[1])) (result, status) = stub.Predict.with_call(request) if status.code() != StatusCode.OK: print "call failed", status return predictions = make_ndarray(result.outputs["classes"]) if predictions.size == 0: print "no predition replied" return cidIndex = predictions[0] print "Server predict with index", cidIndex
def predict(self, request_data, request_timeout=10): # pylint: disable=E1101 request = PredictRequest() request.model_spec.name = self.model_name request.model_spec.version.value = self.model_version for d in request_data: tensor_proto = make_tensor_proto(d['data'], d['in_tensor_dtype']) request.inputs[d['in_tensor_name']].CopyFrom(tensor_proto) response = self._retry_grpc(request, request_timeout) response_dict = grpc_response_to_dict(response) self.logger.info('Got PredictResponse with keys: %s.', list(response_dict)) return response_dict
def call(self, request: Request) -> Any: from micro_toolkit.data_process.text_sequence_padding import TextSequencePadding tsp = TextSequencePadding("<pad>") data = { "words": tsp.fit(request.query), "words_len": [ len(list(filter(lambda x: x != 0.0, text))) for text in request.query ], } predict_request = PredictRequest() predict_request.inputs["words"].CopyFrom( tf.make_tensor_proto(data["words"], dtype=tf.string)) predict_request.inputs["words_len"].CopyFrom( tf.make_tensor_proto(data["words_len"], dtype=tf.int32)) return [predict_request], {}
def test_predict(self): """ python -m unittest tests.sandbox_test.TestSandbox.test_predict """ _initialize_worker("localhost:8500") for i in range(10): np_array = np.random.rand(1,20) request_tensor = make_tensor_proto(np_array) # make a call request = PredictRequest( model_spec=ModelSpec(name='gs-mp-227'), inputs={ 'input': request_tensor } ) response = _worker_stub_singleton.Predict(request) logger.info("+") assert True
def classify(self, img): """ Classify given image using Model Server. :param img: np.ndarray, image to classify """ batch = np.array([img.astype(np.float32) / 255]) request = PredictRequest() tensor_proto = tf.make_tensor_proto(batch) request.inputs[INPUT_NAME].CopyFrom(tensor_proto) request.model_spec.name = self.model_name request.model_spec.signature_name = GRAPH_SIGNATURE_NAME response = self.stub.Predict(request, self.timeout) tensor_proto = response.outputs[OUTPUT_NAME] classes = tensor_util.MakeNdarray(tensor_proto) assert classes.shape == (1, ) return int(classes[0])
def get_prediction( self, model_name: str, model_version: int, inputs: np.ndarray, input_layer_name: str, output_layer_name: str, input_shape: Tuple[int], output_shape: Tuple[int] = None ) -> np.ndarray: """Get predictions from TensorFlow Serving server, from the specified model, version and input. Args: model_name (str): Model name model_version (int): Version of model inputs (np.ndarray): Input as a NumPy array, in the correct shape as expected by the model. This may require an extra axis for number of instances of the input e.g. (1, 224, 224, 3) input_layer_name (str): Input layer name in model output_layer_name (str): Output layer in model input_shape (Tuple[int]): Shape of the input. Depending on the model, an extra first axis may be required which encodes the number of instances of the input e.g. (1, 224, 224, 3) output_shape (Tuple[int]): Shape of the model output, where typically the first axis is the number of instances of the input provided. Returns: np.ndarray: Predictions from model """ request = PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = "serving_default" request.inputs[input_layer_name].CopyFrom( tf.make_tensor_proto( inputs.astype(np.float32), shape=input_shape ) ) result = self.stub.Predict(request) return np.array(result.outputs[output_layer_name].float_val).reshape(output_shape)
def test_model_over_grpc(self): # feature vectors (random) features = np.random.rand(2, 20) model_name = 'test_model' # request request = PredictRequest( model_spec=ModelSpec(name=model_name, signature_name='predict'), inputs={'features': make_tensor_proto(features)}) # call stub (run server first) stub = create_prediction_service_stub('localhost', 50051) try: response = stub.Predict(request) result_tensor = response.outputs.get('result') result_vector = make_np_array(result_tensor) print(result_vector) except Exception as ex: print(ex) assert result_vector.shape.__len__() == 1 # dims assert result_vector.__len__() == 2 # number of records
def _tensorflow_predict_grpc(self, vects: Dict[str, "np.ndarray"], dtype=None) -> Dict[str, "np.ndarray"]: request = PredictRequest() request.model_spec.name = self.tf_model_name for key, vect in vects.items(): request.inputs[key].CopyFrom( tf.compat.v1.make_tensor_proto(vect, dtype=dtype)) if not self.grpc_stub: self.grpc_stub = connect_tf_serving_grpc( self.tf_model_name, self.service_settings.tf_serving.host, self.service_settings.tf_serving.port, ) r = self.grpc_stub.Predict(request, 1) return { output_key: np.array( r.outputs[output_key].ListFields()[-1][1], dtype=self.output_dtypes.get(output_key), ).reshape((vect.shape[0], ) + self.output_shapes[output_key]) for output_key in self.output_tensor_mapping }
def main(): model, signature, batch_file_path, sentence, target = parse_args() feat_dict = {"sentences": [], "targets": []} if batch_file_path is not None: with open(batch_file_path, "r") as batch_file: fieldnames = ["target", "sentence"] csvreader = DictReader(batch_file, fieldnames=fieldnames) for row in csvreader: feat_dict["targets"].append(row["target"].strip()) feat_dict["sentences"].append(row["sentence"].strip()) else: feat_dict["targets"].append(target) feat_dict["sentences"].append(sentence) l_ctxts, trgs, r_ctxts = FeatureProvider.partition_sentences( sentences=feat_dict["sentences"], targets=feat_dict["targets"], offsets=FeatureProvider.get_target_offset_array(feat_dict), ) l_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(l_ctxts) ] trg_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(trgs) ] r_enc = [ FeatureProvider.tf_encode_tokens(tokens) for tokens in FeatureProvider.tokenize_phrases(r_ctxts) ] tf_examples = [] for left, target, right in zip(l_enc, trg_enc, r_enc): features = Features( feature={ "left": Feature(bytes_list=BytesList(value=left)), "target": Feature(bytes_list=BytesList(value=target)), "right": Feature(bytes_list=BytesList(value=right)), } ) tf_example = Example(features=features) tf_examples.append(tf_example.SerializeToString()) tensor_proto = make_tensor_proto( tf_examples, dtype=tf_string, shape=[len(tf_examples)] ) channel = insecure_channel("127.0.0.1:8500") stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # CLASSIFICATION classification_req = ClassificationRequest() inputs = Input(example_list=ExampleList(examples=[tf_example])) classification_req.input.CopyFrom(inputs) # pylint: disable=E1101 classification_req.model_spec.name = "lg" # pylint: disable=E1101 classification = stub.Classify(classification_req, 60.0) print(classification) # PREDICTION prediction_req = PredictRequest() prediction_req.inputs["instances"].CopyFrom( # pylint: disable=E1101 tensor_proto ) prediction_req.model_spec.signature_name = ( # pylint: disable=E1101 signature ) prediction_req.model_spec.name = model # pylint: disable=E1101 prediction = stub.Predict(prediction_req, 60.0) print(prediction)
def predict(self, request_data, request_timeout=10): self.logger.info('Sending request to tfserving model') self.logger.info('Host: {}'.format(self.host)) self.logger.info('Model name: {}'.format(self.model_name)) self.logger.info('Model version: {}'.format(self.model_version)) image = Image.open(request_data) image = image.resize((224, 224), Image.NEAREST) image = np.asarray(image).reshape((1, 224, 224, 3)) # Create gRPC client and request t = time.time() channel = grpc.insecure_channel(self.host) self.logger.debug( 'Establishing insecure channel took: {}'.format(time.time() - t)) t = time.time() stub = PredictionServiceStub(channel) self.logger.debug('Creating stub took: {}'.format(time.time() - t)) t = time.time() request = PredictRequest() self.logger.debug( 'Creating request object took: {}'.format(time.time() - t)) request.model_spec.name = self.model_name request.model_spec.signature_name = 'predict_images' #request.model_spec.signature_name = tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME if self.model_version > 0: request.model_spec.version.value = self.model_version #pic = Image.open(request_data) #image = np.asarray(Image.open(request_data) ) print("Image shape:", image.shape) #foo = tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32), shape=[1, 224, 224, 3]) #print("tensor shape:", foo.get_shape() ) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image.astype(dtype=np.float32), shape=[1, 224, 224, 3])) try: t = time.time() predict_response = stub.Predict(request, timeout=request_timeout) self.logger.debug( 'Actual request took: {} seconds'.format(time.time() - t)) predict_response_dict = predict_response_to_dict(predict_response) keys = [k for k in predict_response_dict] self.logger.info('Got predict_response with keys: {}'.format(keys)) return predict_response_dict except RpcError as e: self.logger.error(e) self.logger.error('Prediction failed!') return {}
args = vars(ap.parse_args()) input_name = "input_1" output_name = "dense_1" # Process input image # img_path = "datasets/images/bluebell/image_0241.jpg" img_path = args["image"] img = load_img(img_path, target_size=(224, 224)) img = img_to_array(img) img = np.expand_dims(img, axis=0) img = imagenet_utils.preprocess_input(img) # print(img.shape) # Create new GRPC request request = PredictRequest() request.model_spec.name = "flowers17" request.model_spec.signature_name = "serving_default" request.inputs[input_name].CopyFrom(tf.make_tensor_proto(img)) # Send request to server channel = grpc.insecure_channel("localhost:8500") predict_service = prediction_service_pb2_grpc.PredictionServiceStub( channel) response = predict_service.Predict(request, timeout=10.0) # print(response) res = response.outputs[output_name].float_val print("[INFO] Raw Prediction Labels: {}".format(res)) prediction = LABELS[np.argmax(res)] print("[INFO] Predicted Label: {}".format(prediction))
def call(self, request: Request) -> Any: predict_request = PredictRequest() predict_request.inputs["embedding_input"].CopyFrom( tf.make_tensor_proto(request.query, dtype=tf.float32)) return [predict_request], {}