def post(self, model, version=None): request_key = self.settings['request_key'] request_data = tornado.escape.json_decode(self.request.body) instances = request_data.get(request_key) if not instances: self.send_error('Request json object have to use the key: %s' % request_key) if len(instances) < 1 or not isinstance(instances, (list, tuple)): self.send_error('Request instances object have to use be a list') instances = decode_b64_if_needed(instances) input_columns = instances[0].keys() request = predict_pb2.PredictRequest() request.model_spec.name = model if version is not None: request.model_spec.version = version for input_column in input_columns: values = [instance[input_column] for instance in instances] request.inputs[input_column].CopyFrom( tf.make_tensor_proto(values, shape=[len(values)])) stub = self.settings['stub'] result = yield fwrap( stub.Predict.future(request, self.settings['rpc_timeout'])) output_keys = result.outputs.keys() predictions = zip(*[ tf.make_ndarray(result.outputs[output_key]).tolist() for output_key in output_keys ]) predictions = [ dict(zip(*t)) for t in zip(repeat(output_keys), predictions) ] self.write(dict(predictions=predictions))
def mnist_client(): num_img = randint(0, 9) TF_MODEL_SERVER_HOST = os.getenv("TF_MODEL_SERVER_HOST", "127.0.0.1") TF_MODEL_SERVER_PORT = int(os.getenv("TF_MODEL_SERVER_PORT", 9000)) TF_DATA_DIR = os.getenv("TF_DATA_DIR", "/tmp/data/") TF_MNIST_IMAGE_PATH = os.getenv("TF_MNIST_IMAGE_PATH", "data/"+str(num_img)+".png") TF_MNIST_TEST_IMAGE_NUMBER = int(os.getenv("TF_MNIST_TEST_IMAGE_NUMBER", -1)) if TF_MNIST_IMAGE_PATH != None: raw_image = Image.open(TF_MNIST_IMAGE_PATH) int_image = numpy.array(raw_image) image = numpy.reshape(int_image, 784).astype(numpy.float32) elif TF_MNIST_TEST_IMAGE_NUMBER > -1: test_data_set = input_data.read_data_sets(TF_DATA_DIR, one_hot=True).test image = test_data_set.images[TF_MNIST_TEST_IMAGE_NUMBER] else: test_data_set = input_data.read_data_sets(TF_DATA_DIR, one_hot=True).test image = random.choice(test_data_set.images) channel = implementations.insecure_channel( TF_MODEL_SERVER_HOST, TF_MODEL_SERVER_PORT) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = "mnist" request.model_spec.signature_name = "serving_default" request.inputs['x'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=[1, 28, 28])) result = stub.Predict(request, 10.0) # 10 secs timeout logging.info(MNIST.display(image, threshold=0)) logging.info("Your model says the above number is... %d!" % result.outputs["classes"].int_val[0]) if (num_img == result.outputs["classes"].int_val[0]): #logging.info(colored("TEST PASSED!!!", 'green')) logging.info("TEST PASSED!!!")
def run(image, model, host='localhost', port=8500, signature_name='serving_default'): channel = grpc.insecure_channel('{host}:{port}'.format(host=host, port=port)) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) im = Image.open(image) data = np.array(im).astype(tf.keras.backend.floatx()) # Read an image #data = imread(image) #data = data.astype(np.float32) #print(data) start = time.time() # Call classification model to make prediction on the image request = predict_pb2.PredictRequest() request.model_spec.name = model request.model_spec.signature_name = signature_name request.inputs['input_image'].CopyFrom(make_tensor_proto(data, shape=[1, data.shape[0], data.shape[1], 3])) result = stub.Predict(request, 10.0) end = time.time() time_diff = end - start # Reference: # How to access nested values # https://stackoverflow.com/questions/44785847/how-to-retrieve-float-val-from-a-predictresponse-object #print(result) print('time elapased: {}'.format(time_diff)) outputs_tensor_proto = result.outputs["predictions/Softmax:0"] shape = tf.TensorShape(outputs_tensor_proto.tensor_shape) #outputs = tf.constant(outputs_tensor_proto.float_val, shape=shape) outputs = np.array(outputs_tensor_proto.float_val).reshape(shape.as_list()) #print(outputs) print(np.argmax(outputs), np.max(outputs))
def __format_request(self, model_name, features): """ 创建basic_score, coherence_score 和 prompt-relevant score Args: model_name: 模型的名称,用于注册request features: doc通过bert encoding的系列特征,包括encoding,shape等等 Returns: """ if not model_name in ["bsp", "csp", "psp"]: raise ValueError( "model_name need to be chosen from bsp, csp and psp!") request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.inputs["doc_encodes"].CopyFrom( tf.contrib.util.make_tensor_proto( features["doc_encodes"], shape=features["doc_encodes"].shape)) request.inputs["article_set"].CopyFrom( tf.contrib.util.make_tensor_proto( features["article_set"], shape=features["article_set"].shape)) request.inputs["domain1_score"].CopyFrom( tf.contrib.util.make_tensor_proto( features["domain1_score"], shape=features["domain1_score"].shape)) request.inputs["article_id"].CopyFrom( tf.contrib.util.make_tensor_proto( features["article_id"], shape=features["article_id"].shape)) request.inputs["doc_sent_num"].CopyFrom( tf.contrib.util.make_tensor_proto( features["doc_sent_num"], shape=features["doc_sent_num"].shape)) request.inputs["prompt_encodes"].CopyFrom( tf.contrib.util.make_tensor_proto( features["prompt_encodes"], shape=features["prompt_encodes"].shape)) return request
def predictImage(): if request.method != 'POST': response = {} response["error"] = "Method not allowed" return str(response) if 'file' not in request.files: response = {} response["error"] = "No file part " return str(response) file = request.files['file'] if file.filename == '': response = {} response["error"] = "No selected file" return str(response) image = PIL.Image.open(file) processed_image = process_image(image) channel = implementations.insecure_channel('localhost',9000) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) _request = predict_pb2.PredictRequest() _request.model_spec.name = 'inception' _request.inputs['input_image'].CopyFrom( tf.contrib.util.make_tensor_proto(processed_image, shape=[1, 299, 299, 3])) result = stub.Predict(_request, 60.0) result_array = tensor_util.MakeNdarray(result.outputs['dense/Softmax:0']) argmax = np.argmax(result_array, axis=-1) response = {} response['class_index'] = str(argmax[0]) response['confidence'] = str(result_array[0][argmax][0]) response['result'] = str(original_labels[argmax][0]) return str(response).replace("'",'"')
def run(inputs): beg = time.time() # 获取输入 mfcc = inputs['mfcc'] spec = inputs['spec'] mel = inputs['mel'] # 处理数据 index = choice(indexs) print('index:', index) server = tf_serving_server[index]['server'] host, port = server.split(':') channel = implementations.insecure_channel(host, int(port)) # stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) stub = prediction_service_pb2_grpc.PredictionServiceStub( channel._channel) request = predict_pb2.PredictRequest() request.model_spec.name = tf_serving_server[index]['name'] request.model_spec.signature_name = 'prediction_pipeline' request.inputs['x_mfccs:0'].CopyFrom( make_tensor_proto(mfcc, shape=[1, 334, 60], dtype='float')) # 1是batch_size request.inputs['y_spec:0'].CopyFrom( make_tensor_proto(spec, shape=[1, 334, 569], dtype='float')) request.inputs['y_mel:0'].CopyFrom( make_tensor_proto(mel, shape=[1, 334, 90], dtype='float')) result = stub.Predict(request, 60.0) pred_spec = np.array(result.outputs['pred_spec:0'].float_val).reshape( 1, 334, 569) ppgs = np.array(result.outputs['ppgs:0'].float_val).reshape(1, 334, 61) # np.save('source.npy',pred_spec) # 返回 ret = {'pred_spec': pred_spec, 'ppgs': ppgs} print('VC_serving Time:', time.time() - beg) return ret
def Apply(self): if (self.valid_input): image = self.cropped_image new_image = image[np.newaxis, :, :, :] new_image = new_image.astype(np.float32) internal_request = predict_pb2.PredictRequest() internal_request.model_spec.name = 'prnet_main' internal_request.model_spec.signature_name = 'predict_images' internal_request.inputs['input'].CopyFrom( tf.contrib.util.make_tensor_proto(new_image, shape=new_image.shape)) internal_result = self.istub.Predict(internal_request, 10.0) pos = tensor_util.MakeNdarray(internal_result.outputs['output']) pos = np.squeeze(pos) self.cropped_pos = pos * PRNet.MaxPos cropped_vertices = np.reshape(self.cropped_pos, [-1, 3]).T z = cropped_vertices[2, :].copy() / self.tform_params[0, 0] cropped_vertices[2, :] = 1 vertices = np.dot(np.linalg.inv(self.tform_params), cropped_vertices) vertices = np.vstack((vertices[:2, :], z)) pos = np.reshape(vertices.T, [PRNet.resolution_op, PRNet.resolution_op, 3]) self.key_points = pos[self.uv_kpt_ind[1, :], self.uv_kpt_ind[0, :], :] all_vertices = np.reshape(pos, [self.resolution_op**2, -1]) self.vertices = all_vertices[self.face_ind, :] else: self.key_points = "None" self.vertices = "None"
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) val_list = [] config = Config() with open(config.test_csv, "r") as val_listfile: listF = val_listfile.readlines() for lis in listF: val_list.append((lis.strip("\n"), 0)) print('Time Started') start = time.clock() helper = getModelHelper(config) data = helper.load_and_preprocess_test_data(val_list) i = 0 for dataPoint in data: request = predict_pb2.PredictRequest() request.model_spec.name = 'predict_defect' request.model_spec.signature_name = 'predict_defect' request.inputs['image'].CopyFrom( tf.contrib.util.make_tensor_proto(dataPoint, shape=np.shape(dataPoint))) result = stub.Predict(request, 100.0) # 100 secs timeout result = MessageToJson(result) resultD = json.loads(result) floatVal = resultD['outputs']['label']['floatVal'] index = np.argmax(floatVal) print("File %s --> Prediction %s" % (val_list[i][0], LBLS[index])) i += 1 print('Time Taken for requests: ', time.clock() - start) return
def do_inference(hostport, work_dir, concurrency, num_tests): """Tests PredictionService with concurrent requests. Args: hostport: Host:port address of the PredictionService. work_dir: The full path of working directory for test data set. concurrency: Maximum number of concurrent requests. num_tests: Number of test images to use. Returns: The classification error rate. Raises: IOError: An error occurred processing test data set. """ sample_files = glob('./datasets/{}/*.*'.format('iphone2foodIns' + '/testA')) host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) result_counter = _ResultCounter(num_tests, concurrency) request = predict_pb2.PredictRequest() request.model_spec.name = 'cyclegan' request.model_spec.signature_name = 'predict_images' preds = [] for sample_file in sample_files: sample_image = [load_test_data(sample_file, 256)] image = np.array(sample_image).astype(np.float32) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=image.shape)) # result_counter.throttle() result = stub.Predict(request, 5.0) # 5 seconds result = np.asarray(result.outputs['scores'].float_val[:]) result = np.reshape(result, (1, 256, 256, 3)) preds.append(result) # return result_counter.get_error_rate() return preds
def do_inference(hostport, dir_extra_info): idx2label, scaler = util.load_extra_info(dir_extra_info) # print(type(idx2label), idx2label['0']) # channel = implementations.insecure_channel(hostport) channel = grpc.insecure_channel(hostport) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = "Pharmap" # exemples of raw inputs inp_1 = [9.62174267, 109, 1.83314993, 3.38175554, 33.7950944, 20.76361192] # Ptilosarcus gurneyi inp_2 = [4.655654, 660, 2.946845, 0.520219, 34.254101, 42.145364] # Heteropolypus ritteri inp_3 = [ 4.208168562365363, 914, 3.2018021623, 0.5074285316, 34.414465499, 43.407423898496646 ] # Heteropolypus ritteri inputs = [inp_2, inp_1, inp_2, inp_3] inputs = scaler.transform( inputs) # the scaler must be the same as during the training ! request.inputs['input'].CopyFrom( tf.make_tensor_proto(inputs, dtype=np.float32)) # 60 is the timeout in seconds, but its blazing fast result = stub.Predict(request, 60.0) res_np = tensor_util.MakeNdarray(result.outputs['scores']) print(res_np.shape) print(res_np) for n in range(0, res_np.shape[0]): print(" ****** ") pprint.pprint(get_n_best_pred_for_one_item(res_np[n, :], 5, idx2label))
def predict_list(self, image_list): result = {} for i, image in enumerate(image_list): request = predict_pb2.PredictRequest() request.model_spec.name = 'clreceipt' request.model_spec.signature_name = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=image.shape)) request.inputs['width'].CopyFrom( tf.contrib.util.make_tensor_proto(image.shape[1], shape=[1])) result_future = self.stub.Predict.future(request, 300.0) # 10 secs timeout def _callback(result_future0, i=i): exception = result_future0.exception() if exception: # self.error_count += 1 print(exception) else: sys.stdout.write(str(i)) sys.stdout.flush() lobprobs = (numpy.array( result_future0.result().outputs['output0'].float_val)) responses = [] labels = '' for j in range(1, 4): responses.append( numpy.array(result_future0.result().outputs[ 'output' + str(j)].int64_val)) labels += '@' + _get_string(responses[-1]) result[i] = labels print('push ' + str(i)) result_future.add_done_callback(_callback) while len(result) < len(image_list): sleep(0.3) print('wait') return result
def main(_): files = [ os.path.join(path, name) for path, _, files in os.walk(FLAGS.data_dir) for name in files ] sess = tf.Session() for f in files: image = cv2.imread(f, cv2.IMREAD_COLOR) image = cv2.resize(image, (FLAGS.height, FLAGS.width), interpolation=cv2.INTER_CUBIC) image = image.transpose((2, 0, 1)) #if your model input layout is not BGR, please change it to RGB #B, G, R = image #image = np.array((R,G,B), dtype=np.uint8) channel = grpc.insecure_channel(FLAGS.server + ":9000") stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) c, h, w = image.shape request = predict_pb2.PredictRequest() request.model_spec.name = 'm' request.model_spec.signature_name = 'predict_images' request.inputs['input'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=[1, c, h, w])) #worked start = time.time() result = stub.Predict(request, 10.0, metadata=(('x-auth-token', FLAGS.token), )) # 10 seconds #Example code to check the inference accuracy, the method depends on the model and the test dataset val = result.outputs['InceptionV4/Logits/Predictions'].float_val target = max(val) for i in range(0, 1000): if (val[i] == target): index = i print(index) dur1 = time.time() - start print("Get Result time: %.6f" % dur1)
def sendRequest(url): request = predict_pb2.PredictRequest() response = predict_pb2.PredictResponse() request.model_spec.name = 'inception' request.model_spec.signature_name = 'predict_images' request.inputs['images'].CopyFrom( tf.make_tensor_proto(getImageData(), shape=[1])) data = request.SerializeToString() data_type = "application/proto" #Add Appcode here token = "YOUR_CODE" headers = { # !!! set content type 'content-type': data_type, # !!! replace your token 'Authorization': "AppCode " + token } res = requests.post(url, data, headers=headers) if (res.status_code == 200 and res.headers['Content-Type'] == data_type): # print res.content response.ParseFromString(res.content) le = len(response.outputs["classes"].string_val) for i in range(le): print("{} score: {}".format( response.outputs["classes"].string_val[i], response.outputs["scores"].float_val[i])) else: # handle error msg print(res.headers['X-Ddy-Error-Message']) print(res.content)
def _predict_grpc_custom_data(self, request): tfrequest = predict_pb2.PredictRequest() # handle input # # Unpack custom data into tfrequest - taking raw inputs prepared by the user. # This allows the use case when the model's input is not a single tftensor # but a map of tensors like defined in predict.proto: # PredictRequest.inputs: map<string, TensorProto> request.customData.Unpack(tfrequest) # handle prediction tfresponse = self._handle_grpc_prediction(tfrequest) # handle result # # Pack tfresponse into the SeldonMessage's custom data - letting user handle # raw outputs. This allows the case when the model's output is not a single tftensor # but a map of tensors like defined in predict.proto: # PredictResponse: map<string, TensorProto> custom_data = Any() custom_data.Pack(tfresponse) return prediction_pb2.SeldonMessage(customData=custom_data)
def do_inference(hostport, features, model_name): if 'so' in features: data_so = make_seq_example(features['so']) else: data_so = make_dense_example(features['mu_sigma']) host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = 'output' request.inputs['examples'].CopyFrom( tf.contrib.util.make_tensor_proto(data_so.SerializeToString(), shape=[1])) result = stub.Predict(request, 100.0) # 10 secs timeout if result.outputs['output'].int64_val != []: return result.outputs['output'].int64_val elif result.outputs['output'].int_val != []: return result.outputs['output'].int_val else: return result.outputs['output'].float_val
def __init__(self, host="localhost", port=8500, model_name="flower", model_signature="flower_signature", input_name="input_image", output_name="emb_pred"): self.host = host self.port = port self.channel = grpc.insecure_channel("{}:{}".format( self.host, self.port )) self.stub = prediction_service_pb2_grpc.PredictionServiceStub( self.channel ) self.input_name = input_name self.output_name = output_name self.request = predict_pb2.PredictRequest() self.request.model_spec.name = model_name self.request.model_spec.signature_name = model_signature
def PostProcess(self): if not self.can_output: frames_output = "None" temporal_rois_output = "None" norm_rois_output = "None" actor_boxes_output = "None" else: frames_output = pickle.dumps(self.frames) temporal_rois_output = pickle.dumps(self.temporal_rois) norm_rois_output = pickle.dumps(self.norm_rois) actor_boxes_output = pickle.dumps(self.actor_boxes) next_request = predict_pb2.PredictRequest() next_request.inputs['frames_output'].CopyFrom( tf.make_tensor_proto(frames_output)) next_request.inputs['temporal_rois_output'].CopyFrom( tf.make_tensor_proto(temporal_rois_output)) next_request.inputs['norm_rois_output'].CopyFrom( tf.make_tensor_proto(norm_rois_output)) next_request.inputs['actor_boxes_output'].CopyFrom( tf.make_tensor_proto(actor_boxes_output)) return next_request
def infer(self, images): # Create prediction request object request = predict_pb2.PredictRequest() # Specify model name (must be the same as when the TensorFlow serving serving was started) request.model_spec.name = self.model_name # Initalize prediction request.inputs['inputs'].CopyFrom(tf.make_tensor_proto(images)) # Call the prediction server result = self.stub.Predict(request, 10.0) # 10 secs timeout # convert tensorProto to numpy array parsed_results = {} for k, v in result.outputs.items(): parsed_results[k] = tf.make_ndarray(v) # fix output result types if 'detection_classes' in parsed_results: parsed_results['detection_classes'] = parsed_results[ 'detection_classes'].astype(np.int64) # parsed_results has the following keys and values # num_detections: number of detections # detection_scores: 2d array of confidence, [image_idx, bbx_idx] # detection_classes: 2d array, [image_idx, bbx_idx] # detection_boxes: 3d array, [image_idx, bbx_idx] is [ymin, xmin, ymax, xmax] return parsed_results
def Apply(self): src_text = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][0]] src_text_length = self.feed_dict[self.model.get_data_layer().input_tensors["source_tensors"][1]].astype(np.int32) internal_request = predict_pb2.PredictRequest() internal_request.model_spec.name = 'transformer' internal_request.model_spec.signature_name = 'predict_output' internal_request.inputs['src_text'].CopyFrom( tf.contrib.util.make_tensor_proto(src_text, shape=list(src_text.shape))) internal_request.inputs['src_text_length'].CopyFrom( tf.contrib.util.make_tensor_proto(src_text_length, shape=list(src_text_length.shape))) internal_result = self.istub.Predict(internal_request, 10.0) # 5 seconds tgt_txt = tensor_util.MakeNdarray( internal_result.outputs['tgt_txt']) self.inputs = {"source_tensors" : [src_text, src_text_length]} self.outputs = [tgt_txt] result = self.model.infer(self.inputs, self.outputs) self.final_result = result[1][0]
def do_inference(hostport, work_dir, batch_size, run_num, thread_id): durationSum = 0.0 # test_data_set = mnist_input_data.read_data_sets(work_dir).test test_data_set = mnist_input_data.read_data_sets(work_dir).train channel = grpc.insecure_channel(hostport) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) for i in range(run_num): start = time.time() request = predict_pb2.PredictRequest() request.model_spec.name = 'mnist' request.model_spec.signature_name = 'predict_images' image, label = test_data_set.next_batch(batch_size) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(image, shape=image.shape)) result = stub.Predict(request, 10.0) end = time.time() duration = end - start durationSum += duration
def create_prediction_request(signature_def, signature_key, payload): prediction_request = predict_pb2.PredictRequest() prediction_request.model_spec.name = "model" prediction_request.model_spec.signature_name = signature_key for column_name, value in payload.items(): shape = [] for dim in signature_def[signature_key]["inputs"][column_name][ "tensorShape"]["dim"]: shape.append(int(dim["size"])) sig_type = signature_def[signature_key]["inputs"][column_name]["dtype"] try: tensor_proto = tf.compat.v1.make_tensor_proto( value, dtype=DTYPE_TO_TF_TYPE[sig_type]) prediction_request.inputs[column_name].CopyFrom(tensor_proto) except Exception as e: raise UserException('key "{}"'.format(column_name), "expected shape {}".format(shape), str(e)) from e return prediction_request
def Apply(self, feed_dict): src_text = feed_dict[self.model.get_data_layer().input_tensors[ "source_tensors"][0]] src_text_length = feed_dict[self.model.get_data_layer().input_tensors[ "source_tensors"][1]].astype(np.int32) request = predict_pb2.PredictRequest() request.model_spec.name = 'transformer' request.model_spec.signature_name = 'predict_output' request.inputs['src_text'].CopyFrom( tf.contrib.util.make_tensor_proto(src_text, shape=list(src_text.shape))) request.inputs['src_text_length'].CopyFrom( tf.contrib.util.make_tensor_proto(src_text_length, shape=list(src_text_length.shape))) result = self.stub.Predict(request, 5.0) # 5 seconds tgt_txt = tensor_util.MakeNdarray( result.outputs['tgt_txt']) inputs = {"source_tensors": [src_text, src_text_length]} outputs = [tgt_txt] return inputs, outputs
def send_request(stub, model_name, batch_tokens, timeout=5.0): """Sends a translation request. Args: stub: The prediction service stub. model_name: The model to request. tokens: A list of tokens. timeout: Timeout after this many seconds. Returns: A future. """ batch_tokens, lengths, max_length = pad_batch(batch_tokens) batch_size = len(lengths) request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.inputs["tokens"].CopyFrom( tf.make_tensor_proto(batch_tokens, dtype=tf.string, shape=(batch_size, max_length))) request.inputs["length"].CopyFrom( tf.make_tensor_proto(lengths, dtype=tf.int32, shape=(batch_size, ))) return stub.Predict.future(request, timeout)
def send_request(stub, inputs, run_num, batch_size, client_id): durationSum = 0.0 request = predict_pb2.PredictRequest() request.model_spec.name = model_name request.model_spec.signature_name = 'serving_default' request.inputs['inputs'].CopyFrom( tf.contrib.util.make_tensor_proto(inputs, shape=inputs.shape)) for i in range(run_num): start = time.time() result = stub.Predict(request, 10.0) end = time.time() duration = end - start durationSum += duration # print("duration = %f" % duration) print("[client %d] for run %d, duration = %f" % (client_id, i, duration)) print("[client %d] average duration for batch size of %d = %f" % (client_id, batch_size, durationSum / run_num))
def main(): start = time.time() # Create request for a model server request = predict_pb2.PredictRequest() request.model_spec.name = FLAGS.model request.model_spec.signature_name = 'serving_default' # Create prediction service stub channel = grpc.insecure_channel("{}:{}".format(FLAGS.host, FLAGS.port)) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) # Read image into numpy array image = cv2.imread(FLAGS.image).astype(numpy.float32) if image is None: print('Invalid image provided: {}'.format(FLAGS.image)) image = cv2.resize(image, (int(FLAGS.size), int(FLAGS.size))) tensor = tf.make_tensor_proto(image, shape=[1] + list(image.shape)) request.inputs['input_1'].CopyFrom(tensor) response = stub.Predict(request, RPC_TIMEOUT) print('Execution time: {} ms'.format((time.time() - start) * 1000))
def predict_batch(self, batch): batchsize = batch.shape[0] lens = [batch.shape[2]] * batchsize lens = np.array(lens, dtype=np.int32) request = predict_pb2.PredictRequest() request.model_spec.name = 'clreceipt' request.model_spec.signature_name = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(batch, shape=batch.shape)) request.inputs['width'].CopyFrom( tf.contrib.util.make_tensor_proto(lens, shape=[batchsize])) try: result = self.stub.Predict(request, 300.0) # 10 secs timeout except Exception as e: print(e) lobprobs = (np.array(result.outputs['output0'].float_val)) responses = [] for j in range(1, 4): responses.append([]) labels = np.array(result.outputs['output' + str(j)].int64_val) for i in range(len(labels)): responses[-1].append(_get_string(labels[i, :])) return responses[0]
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request with open(FLAGS.image, 'rb') as f: # See prediction_service.proto for gRPC request/response details. #data = f.read() # Before making the request, I read the image like this: img = load_preprocess_img(FLAGS.image) request = predict_pb2.PredictRequest() request.model_spec.name = 'food' request.model_spec.signature_name = 'predict' print('keras client side requesting\n') # request.inputs['images'].CopyFrom(tf.contrib.util.make_tensor_proto(data, shape=[1])) request.inputs['images'].CopyFrom( tf.contrib.util.make_tensor_proto(img)) result = stub.Predict(request, 10.0) # 10 secs timeout # to_decode = np.expand_dims(result.outputs['outputs'].float_val, axis=0) print(result)
def predict_toxicity(self, text, model_version=1): text = re.sub(r'\W+', ' ', text) intArray = [] wordArray = text.lower().split() for word in wordArray: if word in self.tokenizer.word_index: intArray.append(self.tokenizer.word_index[word]) else: print("Not including word: {}".format(word)) x = pad_sequences(np.array([intArray]), maxlen=self.maxlen) request = predict_pb2.PredictRequest() ## Set up the request here request.model_spec.name = 'toxic_model' request.model_spec.signature_name = 'predict' request.model_spec.version.value = model_version tp = tf.contrib.util.make_tensor_proto(x, dtype='float32', shape=[1, x.size]) request.inputs['inputs'].CopyFrom(tp) return self.stub.Predict(request, 20.0)
def main(_): host, port = FLAGS.server.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) # Send request start = time.time() with open(FLAGS.image, 'rb') as f: # See prediction_service.proto for gRPC request/response details. data = f.read() img = Image.open(FLAGS.image) (im_width, im_height) = img.size img = np.array(img.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) request = predict_pb2.PredictRequest() request.model_spec.name = 'faster_rcnn' request.model_spec.signature_name = 'serving_default' request.inputs['inputs'].CopyFrom( tf.contrib.util.make_tensor_proto(np.expand_dims(img, 0))) result = stub.Predict(request, 1200.0) # 10 secs timeout end = time.time() print("The time required to do inference is {:0.2f}".format(end - start)) print(result)
def do_inference(hostport, work_dir, req_x): req_x = np.array([req_x], dtype=np.float32) host, port = hostport.split(':') channel = implementations.insecure_channel(host, int(port)) stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) request = predict_pb2.PredictRequest() request.model_spec.name = 'mnist_softmax' request.model_spec.signature_name = 'predict_x' request.inputs['req_x'].CopyFrom(tf.contrib.util.make_tensor_proto(req_x)) result_future = stub.Predict.future(request, 5.0) # 5 seconds exception = result_future.exception() response_data = {'tensor': {}} if exception: response_data['tensor']['error_code'] = 1 response_data = exception else: response_data['tensor']['error_code'] = 0 response_data['tensor']['data'] = np.array( result_future.result().outputs['res_y'].float_val).tolist() return response_data