def test_lightgbm_kserve(): service_name = "isvc-lightgbm" predictor = V1beta1PredictorSpec( min_replicas=1, lightgbm=V1beta1LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm/iris", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v3.json") assert res["predictions"][0][0] > 0.5 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pmml_runtime_kserve(): service_name = 'isvc-pmml-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="pmml", ), storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [{'Species': 'setosa', 'Probability_setosa': 1.0, 'Probability_versicolor': 0.0, 'Probability_virginica': 0.0, 'Node_Id': '2'}]) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_sklearn_v2_kserve(): service_name = "isvc-sklearn-v2" predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://seldon-models/sklearn/mms/lr_model", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_sklearn_runtime_kserve(): service_name = "isvc-sklearn-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="sklearn", ), storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'isvc-explainer-tabular' predictor = V1beta1PredictorSpec(sklearn=V1beta1SKLearnSpec( storage_uri='gs://kfserving-examples/models/sklearn/1.0/income/model', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))) explainer = V1beta1ExplainerSpec( min_replicas=1, alibi=V1beta1AlibiExplainerSpec( name='kserve-container', type='AnchorTabular', storage_uri= 'gs://kfserving-examples/models/sklearn/1.0/income/explainer-py37-0.6.2', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, explainer=explainer)) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/income_input.json') assert (res["predictions"] == [0]) precision = explain(service_name, './data/income_input.json') assert (precision > 0.9) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_batcher_custom_port(): service_name = 'isvc-sklearn-batcher-custom' predictor = V1beta1PredictorSpec( batcher=V1beta1Batcher( max_batch_size=32, max_latency=5000, ), min_replicas=1, sklearn=V1beta1SKLearnSpec( args=["--http_port=5000"], storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ports=[V1ContainerPort(container_port=5000, protocol='TCP')]), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit( lambda: predict_str(service_name, json.dumps(item))) for item in json_array ] results = [f.result()["batchId"] for f in future_res] assert (all(x == results[0] for x in results)) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def create_inference_service(metadata, predictor_spec): """ Build and return V1beta1InferenceService object. """ return V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=metadata, spec=V1beta1InferenceServiceSpec(predictor=predictor_spec), )
def generate_inferenceservice(): tf_spec = V1beta1TFServingSpec( storage_uri='gs://kfserving-samples/models/tensorflow/flowers') predictor_spec = V1beta1PredictorSpec(tensorflow=tf_spec) isvc = V1beta1InferenceService( api_version='serving.kserve.io/v1beta1', kind='InferenceService', metadata=client.V1ObjectMeta(name='flower-sample'), spec=V1beta1InferenceServiceSpec(predictor=predictor_spec)) return isvc
def test_torchserve_grpc(): service_name = "mnist-grpc" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", ports=[ V1ContainerPort(container_port=7070, name="h2c", protocol="TCP") ], resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "1", "memory": "1Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) with open("./data/torchserve_input.json", 'rb') as f: data = f.read() input_data = {'data': data} stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE) response = stub.Predictions( inference_pb2.PredictionsRequest(model_name='mnist', input=input_data)) prediction = response.prediction.decode('utf-8') json_output = json.loads(prediction) print(json_output) assert (json_output["predictions"][0][0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_transformer(): service_name = 'raw-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '1', 'memory': '1Gi'} ) ), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[V1Container( image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'}), args=["--model_name", "mnist"], env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])] ) annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) raise e res = predict(service_name, "./data/transformer.json", model_name="mnist") assert(res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_batcher(): service_name = 'isvc-pytorch-batcher' predictor = V1beta1PredictorSpec( batcher=V1beta1Batcher( max_batch_size=32, max_latency=5000, ), min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri="gs://kfserving-examples/models/torchserve/image_classifier/v1", resources=V1ResourceRequirements( requests={'cpu': '1', 'memory': '4Gi'}, limits={'cpu': '1', 'memory': '4Gi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) raise e with futures.ThreadPoolExecutor(max_workers=4) as executor: future_res = [ executor.submit(lambda: predict(service_name, './data/torchserve_batch_input.json')) for _ in range(4) ] results = [ f.result()["batchId"] for f in future_res ] assert (all(x == results[0] for x in results)) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_paddle_runtime(): predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="paddle", ), storage_uri= "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz", resources=V1ResourceRequirements( requests={ "cpu": "200m", "memory": "4Gi" }, limits={ "cpu": "200m", "memory": "4Gi" }, ))) service_name = 'isvc-paddle-runtime' isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/jay.json') assert np.argmax(res["predictions"][0]) == 17 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert (np.argmax(res["predictions"]) == 3) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_xgboost_v2_runtime_kserve(): service_name = "isvc-xgboost-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="xgboost", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-samples/models/xgboost/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1.0, 1.0] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_lightgbm_v2_runtime_kserve(): service_name = "isvc-lightgbm-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="lightgbm", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "1", "memory": "1Gi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [ 8.796664107010673e-06, 0.9992300031041593, 0.0007612002317336916, 4.974786820804187e-06, 0.9999919650711493, 3.0601420299625077e-06] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_raw_deployment_kserve(): service_name = "raw-sklearn" annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations, ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_torchserve_runtime_kserve(): service_name = "mnist-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", protocol_version="v1", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "4Gi" }, limits={ "cpu": "1", "memory": "4Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/torchserve_input.json", model_name="mnist") assert (res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_kserve_logger(): msg_dumper = 'message-dumper' predictor = V1beta1PredictorSpec( min_replicas=1, containers=[V1Container(name="kserve-container", image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')] ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client.create(isvc) kserve_client.wait_isvc_ready(msg_dumper, namespace=KSERVE_TEST_NAMESPACE) service_name = 'isvc-logger' predictor = V1beta1PredictorSpec( min_replicas=1, logger=V1beta1LoggerSpec( mode="all", url="http://message-dumper."+KSERVE_TEST_NAMESPACE+".svc.cluster.local" ), sklearn=V1beta1SKLearnSpec( storage_uri='gs://kfserving-examples/models/sklearn/1.0/model', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError: pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'. format(service_name)) for pod in pods.items: print(pod) res = predict(service_name, './data/iris_input.json') assert(res["predictions"] == [1, 1]) pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'. format(msg_dumper)) time.sleep(5) log = '' for pod in pods.items: log += kserve_client.core_api.read_namespaced_pod_log(name=pod.metadata.name, namespace=pod.metadata.namespace, container="kserve-container") print(log) assert("org.kubeflow.serving.inference.request" in log) assert("org.kubeflow.serving.inference.response" in log) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE) kserve_client.delete(msg_dumper, KSERVE_TEST_NAMESPACE)
def test_mms_sklearn_kserve(protocol_version: str, storage_uri: str): # Define an inference service predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( protocol_version=protocol_version, resources=client.V1ResourceRequirements( requests={ "cpu": "100m", "memory": "512Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) service_name = f"isvc-sklearn-mms-{protocol_version}" isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) # Create an instance of inference service with isvc kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) cluster_ip = get_cluster_ip() model_names = [ f"model1-sklearn-{protocol_version}", f"model2-sklearn-{protocol_version}", ] for model_name in model_names: model_spec = V1alpha1ModelSpec( storage_uri=storage_uri, memory="128Mi", framework="sklearn", ) model = V1alpha1TrainedModel( api_version=constants.KSERVE_V1ALPHA1, kind=constants.KSERVE_KIND_TRAINEDMODEL, metadata=client.V1ObjectMeta(name=model_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1alpha1TrainedModelSpec(inference_service=service_name, model=model_spec), ) # Create instances of trained models using model1 and model2 kserve_client.create_trained_model(model, KSERVE_TEST_NAMESPACE) kserve_client.wait_model_ready( service_name, model_name, isvc_namespace=KSERVE_TEST_NAMESPACE, isvc_version=constants.KSERVE_V1BETA1_VERSION, protocol_version=protocol_version, cluster_ip=cluster_ip, ) input_json = "./data/iris_input.json" if protocol_version == "v2": input_json = "./data/iris_input_v2.json" responses = [ predict( service_name, input_json, model_name=model_name, protocol_version=protocol_version, ) for model_name in model_names ] if protocol_version == "v1": assert responses[0]["predictions"] == [1, 1] assert responses[1]["predictions"] == [1, 1] elif protocol_version == "v2": assert responses[0]["outputs"][0]["data"] == [1, 1] assert responses[1]["outputs"][0]["data"] == [1, 1] # Clean up inference service and trained models for model_name in model_names: kserve_client.delete_trained_model(model_name, KSERVE_TEST_NAMESPACE) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_canary_rollout_runtime(): service_name = 'isvc-canary-runtime' default_endpoint_spec = V1beta1InferenceServiceSpec( predictor=V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="tensorflow", ), storage_uri='gs://kfserving-samples/models/tensorflow/flowers', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=default_endpoint_spec) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) # define canary endpoint spec, and then rollout 10% traffic to the canary version canary_endpoint_spec = V1beta1InferenceServiceSpec( predictor=V1beta1PredictorSpec( canary_traffic_percent=10, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="tensorflow", ), storage_uri= 'gs://kfserving-samples/models/tensorflow/flowers-2', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '256Mi' }, limits={ 'cpu': '100m', 'memory': '256Mi' })))) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=canary_endpoint_spec) kserve_client.patch(service_name, isvc, namespace=KSERVE_TEST_NAMESPACE) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) canary_isvc = kserve_client.get(service_name, namespace=KSERVE_TEST_NAMESPACE) for traffic in canary_isvc['status']['components']['predictor']['traffic']: if traffic['latestRevision']: assert (traffic['percent'] == 10) # Delete the InferenceService kserve_client.delete(service_name, namespace=KSERVE_TEST_NAMESPACE)
def test_tabular_explainer(): service_name = 'aix-explainer' predictor = V1beta1PredictorSpec(containers=[ V1Container(name="predictor", image='aipipeline/rf-predictor:0.4.0', command=[ "python", "-m", "rfserver", "--model_name", "aix-explainer" ], resources=V1ResourceRequirements(requests={ 'cpu': '500m', 'memory': '1Gi' }, limits={ 'cpu': '500m', 'memory': '1Gi' })) ]) explainer = V1beta1ExplainerSpec(min_replicas=1, aix=V1beta1AIXExplainerSpec( name='explainer', type='LimeImages', resources=V1ResourceRequirements( requests={ 'cpu': '500m', 'memory': '1Gi' }, limits={ 'cpu': '500m', 'memory': '1Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, explainer=explainer)) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: logging.info( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/mnist_input.json') assert (res["predictions"] == [[ 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]]) mask = explain_aix(service_name, './data/mnist_input.json') percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask)) assert (percent_in_mask > 0.6) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_triton_runtime(): service_name = 'isvc-triton-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), runtime="kserve-tritonserver", storage_uri='gs://kfserving-examples/models/torchscript', ports=[ V1ContainerPort(name="h2c", protocol="TCP", container_port=9000) ])) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }), args=["--model_name", "cifar10", "--protocol", "grpc-v2"]) ]) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = kserve_client.app_api. \ list_namespaced_deployment(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/' 'inferenceservice={}'. format(service_name)) for deployment in deployments.items: print(deployment) raise e res = predict(service_name, "./data/image.json", model_name='cifar10') assert (np.argmax(res.get("predictions")[0]) == 5) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)