def predict_str(service_name, input_json, protocol_version="v1", version=constants.KSERVE_V1BETA1_VERSION, model_name=None): kfs_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) isvc = kfs_client.get( service_name, namespace=KSERVE_TEST_NAMESPACE, version=version, ) # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604 time.sleep(10) cluster_ip = get_cluster_ip() host = urlparse(isvc["status"]["url"]).netloc headers = {"Host": host} if model_name is None: model_name = service_name url = f"http://{cluster_ip}/v1/models/{model_name}:predict" if protocol_version == "v2": url = f"http://{cluster_ip}/v2/models/{model_name}/infer" logging.info("Sending Header = %s", headers) logging.info("Sending url = %s", url) logging.info("Sending request data: %s", input_json) response = requests.post(url, input_json, headers=headers) logging.info("Got response code %s, content %s", response.status_code, response.content) preds = json.loads(response.content.decode("utf-8")) return preds
def explain_response(service_name, input_json): kfs_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) isvc = kfs_client.get( service_name, namespace=KSERVE_TEST_NAMESPACE, version=constants.KSERVE_V1BETA1_VERSION, ) # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604 time.sleep(10) cluster_ip = get_cluster_ip() host = urlparse(isvc["status"]["url"]).netloc url = "http://{}/v1/models/{}:explain".format(cluster_ip, service_name) headers = {"Host": host} with open(input_json) as json_file: data = json.load(json_file) logging.info("Sending request data: %s", json.dumps(data)) try: response = requests.post(url, json.dumps(data), headers=headers) logging.info( "Got response code %s, content %s", response.status_code, response.content, ) json_response = json.loads(response.content.decode("utf-8")) except (RuntimeError, json.decoder.JSONDecodeError) as e: logging.info("Explain error -------") logging.info( kfs_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-explainer", )) pods = kfs_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector="serving.kserve.io/inferenceservice={}".format( service_name), ) for pod in pods.items: logging.info(pod) logging.info( "%s\t%s\t%s" % (pod.metadata.name, pod.status.phase, pod.status.pod_ip)) api_response = kfs_client.core_api.read_namespaced_pod_log( pod.metadata.name, KSERVE_TEST_NAMESPACE, container="kserve-container", ) logging.info(api_response) raise e return json_response
def test_set_credentials_gcp(): '''Test GCP credentials creating''' kserve_client = KServeClient() sa_name = constants.DEFAULT_SA_NAME kserve_client.set_credentials( storage_type='gcs', namespace=KSERVE_TEST_NAMESPACE, credentials_file='./credentials/gcp_credentials.json', sa_name=sa_name) created_sa = get_created_sa(sa_name) created_secret_name = created_sa.secrets[0].name created_secret = get_created_secret(created_secret_name) assert created_secret.data[ constants.GCS_CREDS_FILE_DEFAULT_NAME] == gcp_testing_creds
def test_lightgbm_kserve(): service_name = "isvc-lightgbm" predictor = V1beta1PredictorSpec( min_replicas=1, lightgbm=V1beta1LightGBMSpec( storage_uri="gs://kfserving-examples/models/lightgbm/iris", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v3.json") assert res["predictions"][0][0] > 0.5 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_sklearn_v2_kserve(): service_name = "isvc-sklearn-v2" predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://seldon-models/sklearn/mms/lr_model", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "512Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pmml_runtime_kserve(): service_name = 'isvc-pmml-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="pmml", ), storage_uri='gs://kfserving-examples/models/pmml', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '256Mi'}, limits={'cpu': '100m', 'memory': '256Mi'} ) ) ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, './data/pmml_input.json') assert (res["predictions"] == [{'Species': 'setosa', 'Probability_setosa': 1.0, 'Probability_versicolor': 0.0, 'Probability_virginica': 0.0, 'Node_Id': '2'}]) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_sklearn_runtime_kserve(): service_name = "isvc-sklearn-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="sklearn", ), storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "100m", "memory": "256Mi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_azure_credentials(): '''Test Azure credentials creating''' kserve_client = KServeClient() sa_name = constants.DEFAULT_SA_NAME kserve_client.set_credentials( storage_type='Azure', namespace=KSERVE_TEST_NAMESPACE, credentials_file='./credentials/azure_credentials.json', sa_name=sa_name) created_sa = get_created_sa(sa_name) created_secret_name = created_sa.secrets[0].name created_secret = get_created_secret(created_secret_name) assert created_secret.data['AZ_CLIENT_ID'] == 'dXNlcgo=' assert created_secret.data['AZ_CLIENT_SECRET'] == 'cGFzc3dvcmQ=' assert created_secret.data[ 'AZ_SUBSCRIPTION_ID'] == 'MzMzMzMzMzMtMzMzMy0zMzMzLTMzMzMtMzMzMzMz' assert created_secret.data['AZ_TENANT_ID'] == 'MTIzNAo='
def test_torchserve_grpc(): service_name = "mnist-grpc" predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", ports=[ V1ContainerPort(container_port=7070, name="h2c", protocol="TCP") ], resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "1Gi" }, limits={ "cpu": "1", "memory": "1Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) with open("./data/torchserve_input.json", 'rb') as f: data = f.read() input_data = {'data': data} stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE) response = stub.Predictions( inference_pb2.PredictionsRequest(model_name='mnist', input=input_data)) prediction = response.prediction.decode('utf-8') json_output = json.loads(prediction) print(json_output) assert (json_output["predictions"][0][0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_set_credentials_s3(): """Test S3 credentials creating.""" kfserving = KServeClient() credentials_file = './credentials/aws_credentials' # Test creating service account case. sa_name = constants.DEFAULT_SA_NAME if check_sa_exists(sa_name): delete_sa(sa_name) kfserving.set_credentials(storage_type='s3', namespace=KSERVE_TEST_NAMESPACE, credentials_file=credentials_file, s3_profile='default', s3_endpoint='s3.us-west-2.amazonaws.com', s3_region='us-west-2', s3_use_https='1', s3_verify_ssl='0') sa_body = get_created_sa(sa_name) created_secret_name = sa_body.secrets[0].name created_secret = get_created_secret(created_secret_name) config = configparser.ConfigParser() config.read([expanduser(credentials_file)]) s3_access_key_id = config.get('default', 'aws_access_key_id') s3_secret_access_key = config.get('default', 'aws_secret_access_key') assert created_secret.data[ constants.S3_ACCESS_KEY_ID_DEFAULT_NAME] == s3_access_key_id assert created_secret.data[ constants.S3_SECRET_ACCESS_KEY_DEFAULT_NAME] == s3_secret_access_key assert created_secret.metadata.annotations[ constants.KSERVE_GROUP + '/s3-endpoint'] == 's3.us-west-2.amazonaws.com' assert created_secret.metadata.annotations[constants.KSERVE_GROUP + '/s3-region'] == 'us-west-2' assert created_secret.metadata.annotations[constants.KSERVE_GROUP + '/s3-usehttps'] == '1' assert created_secret.metadata.annotations[constants.KSERVE_GROUP + '/s3-verifyssl'] == '0'
def test_transformer(): service_name = 'raw-transformer' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '1', 'memory': '1Gi'} ) ), ) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[V1Container( image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements( requests={'cpu': '100m', 'memory': '1Gi'}, limits={'cpu': '100m', 'memory': '1Gi'}), args=["--model_name", "mnist"], env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])] ) annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready( service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) raise e res = predict(service_name, "./data/transformer.json", model_name="mnist") assert(res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_paddle_runtime(): predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="paddle", ), storage_uri= "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz", resources=V1ResourceRequirements( requests={ "cpu": "200m", "memory": "4Gi" }, limits={ "cpu": "200m", "memory": "4Gi" }, ))) service_name = 'isvc-paddle-runtime' isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE, timeout_seconds=720) except RuntimeError as e: pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: logging.info(pod) raise e res = predict(service_name, './data/jay.json') assert np.argmax(res["predictions"][0]) == 17 kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_pytorch(): service_name = 'isvc-pytorch' predictor = V1beta1PredictorSpec( min_replicas=1, pytorch=V1beta1TorchServeSpec( storage_uri='gs://kfserving-samples/models/pytorch/cifar10', model_class_name="Net", resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '2Gi' }, limits={ 'cpu': '100m', 'memory': '2Gi' }))) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) pods = kserve_client.core_api.list_namespaced_pod( KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/inferenceservice={}'.format( service_name)) for pod in pods.items: print(pod) raise e res = predict(service_name, './data/cifar_input.json') assert (np.argmax(res["predictions"]) == 3) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_xgboost_v2_runtime_kserve(): service_name = "isvc-xgboost-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="xgboost", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-samples/models/xgboost/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [1.0, 1.0] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_lightgbm_v2_runtime_kserve(): service_name = "isvc-lightgbm-v2-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat( name="lightgbm", ), runtime="kserve-mlserver", storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris", protocol_version="v2", resources=V1ResourceRequirements( requests={"cpu": "100m", "memory": "256Mi"}, limits={"cpu": "1", "memory": "1Gi"}, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2") assert res["outputs"][0]["data"] == [ 8.796664107010673e-06, 0.9992300031041593, 0.0007612002317336916, 4.974786820804187e-06, 0.9999919650711493, 3.0601420299625077e-06] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_raw_deployment_kserve(): service_name = "raw-sklearn" annotations = dict() annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment' annotations['kubernetes.io/ingress.class'] = 'istio' predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( storage_uri="gs://kfserving-examples/models/sklearn/1.0/model", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "256Mi" }, limits={ "cpu": "100m", "memory": "256Mi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations, ), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/iris_input.json") assert res["predictions"] == [1, 1] kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_torchserve_runtime_kserve(): service_name = "mnist-runtime" predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), storage_uri= "gs://kfserving-examples/models/torchserve/image_classifier/v1", protocol_version="v1", resources=V1ResourceRequirements( requests={ "cpu": "100m", "memory": "4Gi" }, limits={ "cpu": "1", "memory": "4Gi" }, ), ), ) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) res = predict(service_name, "./data/torchserve_input.json", model_name="mnist") assert (res.get("predictions")[0] == 2) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
from kubernetes import client from kserve import KServeClient from kserve import constants from kserve import V1beta1PredictorSpec from kserve import V1beta1SKLearnSpec from kserve import V1beta1InferenceServiceSpec from kserve import V1beta1InferenceService from kserve import V1beta1LoggerSpec from kubernetes.client import V1ResourceRequirements from kubernetes.client import V1Container from ..common.utils import predict from ..common.utils import KSERVE_TEST_NAMESPACE import time kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) def test_kserve_logger(): msg_dumper = 'message-dumper' predictor = V1beta1PredictorSpec( min_replicas=1, containers=[V1Container(name="kserve-container", image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')] ) isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta( name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor))
def test_mms_sklearn_kserve(protocol_version: str, storage_uri: str): # Define an inference service predictor = V1beta1PredictorSpec( min_replicas=1, sklearn=V1beta1SKLearnSpec( protocol_version=protocol_version, resources=client.V1ResourceRequirements( requests={ "cpu": "100m", "memory": "512Mi" }, limits={ "cpu": "100m", "memory": "1024Mi" }, ), ), ) service_name = f"isvc-sklearn-mms-{protocol_version}" isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor), ) # Create an instance of inference service with isvc kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) cluster_ip = get_cluster_ip() model_names = [ f"model1-sklearn-{protocol_version}", f"model2-sklearn-{protocol_version}", ] for model_name in model_names: model_spec = V1alpha1ModelSpec( storage_uri=storage_uri, memory="128Mi", framework="sklearn", ) model = V1alpha1TrainedModel( api_version=constants.KSERVE_V1ALPHA1, kind=constants.KSERVE_KIND_TRAINEDMODEL, metadata=client.V1ObjectMeta(name=model_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1alpha1TrainedModelSpec(inference_service=service_name, model=model_spec), ) # Create instances of trained models using model1 and model2 kserve_client.create_trained_model(model, KSERVE_TEST_NAMESPACE) kserve_client.wait_model_ready( service_name, model_name, isvc_namespace=KSERVE_TEST_NAMESPACE, isvc_version=constants.KSERVE_V1BETA1_VERSION, protocol_version=protocol_version, cluster_ip=cluster_ip, ) input_json = "./data/iris_input.json" if protocol_version == "v2": input_json = "./data/iris_input_v2.json" responses = [ predict( service_name, input_json, model_name=model_name, protocol_version=protocol_version, ) for model_name in model_names ] if protocol_version == "v1": assert responses[0]["predictions"] == [1, 1] assert responses[1]["predictions"] == [1, 1] elif protocol_version == "v2": assert responses[0]["outputs"][0]["data"] == [1, 1] assert responses[1]["outputs"][0]["data"] == [1, 1] # Clean up inference service and trained models for model_name in model_names: kserve_client.delete_trained_model(model_name, KSERVE_TEST_NAMESPACE) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
def test_triton_runtime(): service_name = 'isvc-triton-runtime' predictor = V1beta1PredictorSpec( min_replicas=1, model=V1beta1ModelSpec( model_format=V1beta1ModelFormat(name="pytorch", ), runtime="kserve-tritonserver", storage_uri='gs://kfserving-examples/models/torchscript', ports=[ V1ContainerPort(name="h2c", protocol="TCP", container_port=9000) ])) transformer = V1beta1TransformerSpec( min_replicas=1, containers=[ V1Container( image= '809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:' + os.environ.get("PULL_BASE_SHA"), name='kserve-container', resources=V1ResourceRequirements(requests={ 'cpu': '100m', 'memory': '1Gi' }, limits={ 'cpu': '100m', 'memory': '1Gi' }), args=["--model_name", "cifar10", "--protocol", "grpc-v2"]) ]) isvc = V1beta1InferenceService( api_version=constants.KSERVE_V1BETA1, kind=constants.KSERVE_KIND, metadata=client.V1ObjectMeta(name=service_name, namespace=KSERVE_TEST_NAMESPACE), spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer)) kserve_client = KServeClient( config_file=os.environ.get("KUBECONFIG", "~/.kube/config")) kserve_client.create(isvc) try: kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE) except RuntimeError as e: print( kserve_client.api_instance.get_namespaced_custom_object( "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services", service_name + "-predictor-default")) deployments = kserve_client.app_api. \ list_namespaced_deployment(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/' 'inferenceservice={}'. format(service_name)) for deployment in deployments.items: print(deployment) raise e res = predict(service_name, "./data/image.json", model_name='cifar10') assert (np.argmax(res.get("predictions")[0]) == 5) kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest.mock import patch from kubernetes import client from kserve import V1beta1PredictorSpec from kserve import V1beta1TFServingSpec from kserve import V1beta1InferenceServiceSpec from kserve import V1beta1InferenceService from kserve import KServeClient kserve_client = KServeClient(config_file='./kserve/test/kubeconfig') mocked_unit_result = \ ''' { "api_version": "serving.kserve.io/v1beta1", "kind": "InferenceService", "metadata": { "name": "flower-sample", "namespace": "kubeflow" }, "spec": { "predictor": { "tensorflow": { "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers" }
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from unittest.mock import patch from kubernetes import client from kserve import V1beta1PredictorSpec from kserve import V1beta1TFServingSpec from kserve import V1beta1InferenceServiceSpec from kserve import V1beta1InferenceService from kserve import KServeClient kserve_client = KServeClient() mocked_unit_result = \ ''' { "api_version": "serving.kserve.io/v1beta1", "kind": "InferenceService", "metadata": { "name": "flower-sample", "namespace": "kubeflow" }, "spec": { "predictor": { "tensorflow": { "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers" }
def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, custom_model_spec, service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the provided input and then sent for creation/update. :return InferenceService JSON output """ kserve_client = KServeClient() if inferenceservice_yaml: # Overwrite name and namespace if exists if namespace: inferenceservice_yaml['metadata']['namespace'] = namespace if model_name: inferenceservice_yaml['metadata']['name'] = model_name else: model_name = inferenceservice_yaml['metadata']['name'] isvc = inferenceservice_yaml elif action != 'delete': # Create annotations annotations = {} if int(autoscaling_target) != 0: annotations["autoscaling.knative.dev/target"] = str( autoscaling_target) if not enable_istio_sidecar: annotations["sidecar.istio.io/inject"] = 'false' if not annotations: annotations = None metadata = client.V1ObjectMeta(name=model_name, namespace=namespace, annotations=annotations) # If a custom model container spec was provided, build the V1Container # object using it. containers = [] if custom_model_spec: containers = [create_custom_container_spec(custom_model_spec)] # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec(framework, model_uri, canary_traffic_percent, service_account, min_replicas, max_replicas, containers, request_timeout) isvc = create_inference_service(metadata, predictor_spec) if action == "create": submit_api_request(kserve_client, 'create', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "update": submit_api_request(kserve_client, 'update', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "apply": try: submit_api_request(kserve_client, 'create', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) except Exception: submit_api_request(kserve_client, 'update', model_name, isvc, namespace, watch=True, timeout_seconds=watch_timeout) elif action == "delete": kserve_client.delete(model_name, namespace=namespace) else: raise ("Error: No matching action: " + action) model_status = kserve_client.get(model_name, namespace=namespace) return model_status