示例#1
0
def test_lightgbm_kserve():
    service_name = "isvc-lightgbm"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        lightgbm=V1beta1LightGBMSpec(
            storage_uri="gs://kfserving-examples/models/lightgbm/iris",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "256Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v3.json")
    assert res["predictions"][0][0] > 0.5
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#2
0
def test_pmml_runtime_kserve():
    service_name = 'isvc-pmml-runtime'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="pmml",
            ),
            storage_uri='gs://kfserving-examples/models/pmml',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '256Mi'},
                limits={'cpu': '100m', 'memory': '256Mi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KSERVE_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, './data/pmml_input.json')
    assert (res["predictions"] == [{'Species': 'setosa',
                                    'Probability_setosa': 1.0,
                                    'Probability_versicolor': 0.0,
                                    'Probability_virginica': 0.0,
                                    'Node_Id': '2'}])
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#3
0
def test_sklearn_v2_kserve():
    service_name = "isvc-sklearn-v2"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://seldon-models/sklearn/mms/lr_model",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "512Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
    assert res["outputs"][0]["data"] == [1, 1]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#4
0
def test_sklearn_runtime_kserve():
    service_name = "isvc-sklearn-runtime"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="sklearn",
            ),
            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "256Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#5
0
def test_tabular_explainer():
    service_name = 'isvc-explainer-tabular'
    predictor = V1beta1PredictorSpec(sklearn=V1beta1SKLearnSpec(
        storage_uri='gs://kfserving-examples/models/sklearn/1.0/income/model',
        resources=V1ResourceRequirements(requests={
            'cpu': '100m',
            'memory': '1Gi'
        },
                                         limits={
                                             'cpu': '100m',
                                             'memory': '1Gi'
                                         })))
    explainer = V1beta1ExplainerSpec(
        min_replicas=1,
        alibi=V1beta1AlibiExplainerSpec(
            name='kserve-container',
            type='AnchorTabular',
            storage_uri=
            'gs://kfserving-examples/models/sklearn/1.0/income/explainer-py37-0.6.2',
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '1Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '1Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         explainer=explainer))

    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE,
                                      timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/income_input.json')
    assert (res["predictions"] == [0])
    precision = explain(service_name, './data/income_input.json')
    assert (precision > 0.9)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#6
0
def test_batcher_custom_port():
    service_name = 'isvc-sklearn-batcher-custom'

    predictor = V1beta1PredictorSpec(
        batcher=V1beta1Batcher(
            max_batch_size=32,
            max_latency=5000,
        ),
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            args=["--http_port=5000"],
            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
            ports=[V1ContainerPort(container_port=5000, protocol='TCP')]),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_res = [
            executor.submit(
                lambda: predict_str(service_name, json.dumps(item)))
            for item in json_array
        ]
    results = [f.result()["batchId"] for f in future_res]
    assert (all(x == results[0] for x in results))
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#7
0
def create_inference_service(metadata, predictor_spec):
    """
    Build and return V1beta1InferenceService object.
    """
    return V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=metadata,
        spec=V1beta1InferenceServiceSpec(predictor=predictor_spec),
    )
示例#8
0
def generate_inferenceservice():
    tf_spec = V1beta1TFServingSpec(
        storage_uri='gs://kfserving-samples/models/tensorflow/flowers')
    predictor_spec = V1beta1PredictorSpec(tensorflow=tf_spec)

    isvc = V1beta1InferenceService(
        api_version='serving.kserve.io/v1beta1',
        kind='InferenceService',
        metadata=client.V1ObjectMeta(name='flower-sample'),
        spec=V1beta1InferenceServiceSpec(predictor=predictor_spec))
    return isvc
示例#9
0
def test_torchserve_grpc():
    service_name = "mnist-grpc"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri=
            "gs://kfserving-examples/models/torchserve/image_classifier/v1",
            ports=[
                V1ContainerPort(container_port=7070,
                                name="h2c",
                                protocol="TCP")
            ],
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "1Gi"
                },
                limits={
                    "cpu": "1",
                    "memory": "1Gi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    with open("./data/torchserve_input.json", 'rb') as f:
        data = f.read()

    input_data = {'data': data}
    stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE)
    response = stub.Predictions(
        inference_pb2.PredictionsRequest(model_name='mnist', input=input_data))

    prediction = response.prediction.decode('utf-8')
    json_output = json.loads(prediction)
    print(json_output)
    assert (json_output["predictions"][0][0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#10
0
def test_transformer():
    service_name = 'raw-transformer'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '1Gi'},
                limits={'cpu': '1', 'memory': '1Gi'}
            )
        ),
    )
    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[V1Container(
            image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:'
                  + os.environ.get("PULL_BASE_SHA"),
            name='kserve-container',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '1Gi'},
                limits={'cpu': '100m', 'memory': '1Gi'}),
            args=["--model_name", "mnist"],
            env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])]
    )

    annotations = dict()
    annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment'
    annotations['kubernetes.io/ingress.class'] = 'istio'
    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer))

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(
            service_name, namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                                                                      KSERVE_TEST_NAMESPACE,
                                                                      "services", service_name + "-predictor-default"))
        raise e

    res = predict(service_name, "./data/transformer.json", model_name="mnist")
    assert(res.get("predictions")[0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#11
0
def test_batcher():
    service_name = 'isvc-pytorch-batcher'
    predictor = V1beta1PredictorSpec(
        batcher=V1beta1Batcher(
            max_batch_size=32,
            max_latency=5000,
        ),
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri="gs://kfserving-examples/models/torchserve/image_classifier/v1",
            resources=V1ResourceRequirements(
                requests={'cpu': '1', 'memory': '4Gi'},
                limits={'cpu': '1', 'memory': '4Gi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name,
                                       namespace=KSERVE_TEST_NAMESPACE
                                   ),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                                                                      KSERVE_TEST_NAMESPACE,
                                                                      "services", service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE,
                                                          label_selector='serving.kserve.io/inferenceservice={}'.
                                                          format(service_name))
        for pod in pods.items:
            print(pod)
        raise e
    with futures.ThreadPoolExecutor(max_workers=4) as executor:
        future_res = [
            executor.submit(lambda: predict(service_name, './data/torchserve_batch_input.json')) for _ in range(4)
        ]
    results = [
        f.result()["batchId"] for f in future_res
    ]
    assert (all(x == results[0] for x in results))
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#12
0
def test_paddle_runtime():
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="paddle", ),
            storage_uri=
            "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
            )))

    service_name = 'isvc-paddle-runtime'
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=V1ObjectMeta(name=service_name,
                              namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE,
                                      timeout_seconds=720)
    except RuntimeError as e:
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/jay.json')
    assert np.argmax(res["predictions"][0]) == 17

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#13
0
def test_pytorch():
    service_name = 'isvc-pytorch'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
            model_class_name="Net",
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '2Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '2Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/cifar_input.json')
    assert (np.argmax(res["predictions"]) == 3)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#14
0
def test_xgboost_v2_runtime_kserve():
    service_name = "isvc-xgboost-v2-runtime"

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="xgboost", ),
            runtime="kserve-mlserver",
            storage_uri="gs://kfserving-samples/models/xgboost/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "1024Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name,
                  "./data/iris_input_v2.json",
                  protocol_version="v2")
    assert res["outputs"][0]["data"] == [1.0, 1.0]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#15
0
def test_lightgbm_v2_runtime_kserve():
    service_name = "isvc-lightgbm-v2-runtime"

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="lightgbm",
            ),
            runtime="kserve-mlserver",
            storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "1", "memory": "1Gi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
    assert res["outputs"][0]["data"] == [
        8.796664107010673e-06,
        0.9992300031041593,
        0.0007612002317336916,
        4.974786820804187e-06,
        0.9999919650711493,
        3.0601420299625077e-06]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#16
0
def test_raw_deployment_kserve():
    service_name = "raw-sklearn"
    annotations = dict()
    annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment'
    annotations['kubernetes.io/ingress.class'] = 'istio'

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name,
            namespace=KSERVE_TEST_NAMESPACE,
            annotations=annotations,
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#17
0
def test_torchserve_runtime_kserve():
    service_name = "mnist-runtime"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="pytorch", ),
            storage_uri=
            "gs://kfserving-examples/models/torchserve/image_classifier/v1",
            protocol_version="v1",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "1",
                    "memory": "4Gi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name,
                  "./data/torchserve_input.json",
                  model_name="mnist")
    assert (res.get("predictions")[0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#18
0
def test_kserve_logger():
    msg_dumper = 'message-dumper'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        containers=[V1Container(name="kserve-container",
                                image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')]
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                        name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(msg_dumper, namespace=KSERVE_TEST_NAMESPACE)

    service_name = 'isvc-logger'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        logger=V1beta1LoggerSpec(
            mode="all",
            url="http://message-dumper."+KSERVE_TEST_NAMESPACE+".svc.cluster.local"
        ),
        sklearn=V1beta1SKLearnSpec(
            storage_uri='gs://kfserving-examples/models/sklearn/1.0/model',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '256Mi'},
                limits={'cpu': '100m', 'memory': '256Mi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=KSERVE_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError:
        pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE,
                                                          label_selector='serving.kserve.io/inferenceservice={}'.
                                                          format(service_name))
        for pod in pods.items:
            print(pod)

    res = predict(service_name, './data/iris_input.json')
    assert(res["predictions"] == [1, 1])
    pods = kserve_client.core_api.list_namespaced_pod(KSERVE_TEST_NAMESPACE,
                                                      label_selector='serving.kserve.io/inferenceservice={}'.
                                                      format(msg_dumper))
    time.sleep(5)
    log = ''
    for pod in pods.items:
        log += kserve_client.core_api.read_namespaced_pod_log(name=pod.metadata.name,
                                                              namespace=pod.metadata.namespace,
                                                              container="kserve-container")
        print(log)
    assert("org.kubeflow.serving.inference.request" in log)
    assert("org.kubeflow.serving.inference.response" in log)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
    kserve_client.delete(msg_dumper, KSERVE_TEST_NAMESPACE)
def test_mms_sklearn_kserve(protocol_version: str, storage_uri: str):
    # Define an inference service
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            protocol_version=protocol_version,
            resources=client.V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "512Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "1024Mi"
                },
            ),
        ),
    )

    service_name = f"isvc-sklearn-mms-{protocol_version}"
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    # Create an instance of inference service with isvc
    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    cluster_ip = get_cluster_ip()

    model_names = [
        f"model1-sklearn-{protocol_version}",
        f"model2-sklearn-{protocol_version}",
    ]

    for model_name in model_names:
        model_spec = V1alpha1ModelSpec(
            storage_uri=storage_uri,
            memory="128Mi",
            framework="sklearn",
        )

        model = V1alpha1TrainedModel(
            api_version=constants.KSERVE_V1ALPHA1,
            kind=constants.KSERVE_KIND_TRAINEDMODEL,
            metadata=client.V1ObjectMeta(name=model_name,
                                         namespace=KSERVE_TEST_NAMESPACE),
            spec=V1alpha1TrainedModelSpec(inference_service=service_name,
                                          model=model_spec),
        )

        # Create instances of trained models using model1 and model2
        kserve_client.create_trained_model(model, KSERVE_TEST_NAMESPACE)

        kserve_client.wait_model_ready(
            service_name,
            model_name,
            isvc_namespace=KSERVE_TEST_NAMESPACE,
            isvc_version=constants.KSERVE_V1BETA1_VERSION,
            protocol_version=protocol_version,
            cluster_ip=cluster_ip,
        )

    input_json = "./data/iris_input.json"
    if protocol_version == "v2":
        input_json = "./data/iris_input_v2.json"

    responses = [
        predict(
            service_name,
            input_json,
            model_name=model_name,
            protocol_version=protocol_version,
        ) for model_name in model_names
    ]

    if protocol_version == "v1":
        assert responses[0]["predictions"] == [1, 1]
        assert responses[1]["predictions"] == [1, 1]
    elif protocol_version == "v2":
        assert responses[0]["outputs"][0]["data"] == [1, 1]
        assert responses[1]["outputs"][0]["data"] == [1, 1]

    # Clean up inference service and trained models
    for model_name in model_names:
        kserve_client.delete_trained_model(model_name, KSERVE_TEST_NAMESPACE)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#20
0
def test_canary_rollout_runtime():
    service_name = 'isvc-canary-runtime'
    default_endpoint_spec = V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            min_replicas=1,
            model=V1beta1ModelSpec(
                model_format=V1beta1ModelFormat(name="tensorflow", ),
                storage_uri='gs://kfserving-samples/models/tensorflow/flowers',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name,
                                       namespace=KSERVE_TEST_NAMESPACE),
                                   spec=default_endpoint_spec)

    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    # define canary endpoint spec, and then rollout 10% traffic to the canary version
    canary_endpoint_spec = V1beta1InferenceServiceSpec(
        predictor=V1beta1PredictorSpec(
            canary_traffic_percent=10,
            model=V1beta1ModelSpec(
                model_format=V1beta1ModelFormat(name="tensorflow", ),
                storage_uri=
                'gs://kfserving-samples/models/tensorflow/flowers-2',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '256Mi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '256Mi'
                                                 }))))
    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name,
                                       namespace=KSERVE_TEST_NAMESPACE),
                                   spec=canary_endpoint_spec)

    kserve_client.patch(service_name, isvc, namespace=KSERVE_TEST_NAMESPACE)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    canary_isvc = kserve_client.get(service_name,
                                    namespace=KSERVE_TEST_NAMESPACE)
    for traffic in canary_isvc['status']['components']['predictor']['traffic']:
        if traffic['latestRevision']:
            assert (traffic['percent'] == 10)

    # Delete the InferenceService
    kserve_client.delete(service_name, namespace=KSERVE_TEST_NAMESPACE)
示例#21
0
def test_tabular_explainer():
    service_name = 'aix-explainer'
    predictor = V1beta1PredictorSpec(containers=[
        V1Container(name="predictor",
                    image='aipipeline/rf-predictor:0.4.0',
                    command=[
                        "python", "-m", "rfserver", "--model_name",
                        "aix-explainer"
                    ],
                    resources=V1ResourceRequirements(requests={
                        'cpu': '500m',
                        'memory': '1Gi'
                    },
                                                     limits={
                                                         'cpu': '500m',
                                                         'memory': '1Gi'
                                                     }))
    ])
    explainer = V1beta1ExplainerSpec(min_replicas=1,
                                     aix=V1beta1AIXExplainerSpec(
                                         name='explainer',
                                         type='LimeImages',
                                         resources=V1ResourceRequirements(
                                             requests={
                                                 'cpu': '500m',
                                                 'memory': '1Gi'
                                             },
                                             limits={
                                                 'cpu': '500m',
                                                 'memory': '1Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         explainer=explainer))

    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE,
                                      timeout_seconds=720)
    except RuntimeError as e:
        logging.info(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/mnist_input.json')
    assert (res["predictions"] == [[
        0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
    ]])

    mask = explain_aix(service_name, './data/mnist_input.json')
    percent_in_mask = np.count_nonzero(mask) / np.size(np.array(mask))
    assert (percent_in_mask > 0.6)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
示例#22
0
def test_triton_runtime():
    service_name = 'isvc-triton-runtime'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="pytorch", ),
            runtime="kserve-tritonserver",
            storage_uri='gs://kfserving-examples/models/torchscript',
            ports=[
                V1ContainerPort(name="h2c",
                                protocol="TCP",
                                container_port=9000)
            ]))

    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[
            V1Container(
                image=
                '809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:'
                + os.environ.get("PULL_BASE_SHA"),
                name='kserve-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }),
                args=["--model_name", "cifar10", "--protocol", "grpc-v2"])
        ])
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         transformer=transformer))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        deployments = kserve_client.app_api. \
            list_namespaced_deployment(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/'
                                       'inferenceservice={}'.
                                       format(service_name))
        for deployment in deployments.items:
            print(deployment)
        raise e
    res = predict(service_name, "./data/image.json", model_name='cifar10')
    assert (np.argmax(res.get("predictions")[0]) == 5)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)