Exemplo n.º 1
0
def predict_str(service_name, input_json, protocol_version="v1",
                version=constants.KSERVE_V1BETA1_VERSION, model_name=None):
    kfs_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    isvc = kfs_client.get(
        service_name,
        namespace=KSERVE_TEST_NAMESPACE,
        version=version,
    )
    # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604
    time.sleep(10)
    cluster_ip = get_cluster_ip()
    host = urlparse(isvc["status"]["url"]).netloc
    headers = {"Host": host}

    if model_name is None:
        model_name = service_name

    url = f"http://{cluster_ip}/v1/models/{model_name}:predict"
    if protocol_version == "v2":
        url = f"http://{cluster_ip}/v2/models/{model_name}/infer"

    logging.info("Sending Header = %s", headers)
    logging.info("Sending url = %s", url)
    logging.info("Sending request data: %s", input_json)
    response = requests.post(url, input_json, headers=headers)
    logging.info("Got response code %s, content %s", response.status_code, response.content)
    preds = json.loads(response.content.decode("utf-8"))
    return preds
Exemplo n.º 2
0
def explain_response(service_name, input_json):
    kfs_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    isvc = kfs_client.get(
        service_name,
        namespace=KSERVE_TEST_NAMESPACE,
        version=constants.KSERVE_V1BETA1_VERSION,
    )
    # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604
    time.sleep(10)
    cluster_ip = get_cluster_ip()
    host = urlparse(isvc["status"]["url"]).netloc
    url = "http://{}/v1/models/{}:explain".format(cluster_ip, service_name)
    headers = {"Host": host}
    with open(input_json) as json_file:
        data = json.load(json_file)
        logging.info("Sending request data: %s", json.dumps(data))
        try:
            response = requests.post(url, json.dumps(data), headers=headers)
            logging.info(
                "Got response code %s, content %s",
                response.status_code,
                response.content,
            )
            json_response = json.loads(response.content.decode("utf-8"))
        except (RuntimeError, json.decoder.JSONDecodeError) as e:
            logging.info("Explain error -------")
            logging.info(
                kfs_client.api_instance.get_namespaced_custom_object(
                    "serving.knative.dev",
                    "v1",
                    KSERVE_TEST_NAMESPACE,
                    "services",
                    service_name + "-explainer",
                ))
            pods = kfs_client.core_api.list_namespaced_pod(
                KSERVE_TEST_NAMESPACE,
                label_selector="serving.kserve.io/inferenceservice={}".format(
                    service_name),
            )
            for pod in pods.items:
                logging.info(pod)
                logging.info(
                    "%s\t%s\t%s" %
                    (pod.metadata.name, pod.status.phase, pod.status.pod_ip))
                api_response = kfs_client.core_api.read_namespaced_pod_log(
                    pod.metadata.name,
                    KSERVE_TEST_NAMESPACE,
                    container="kserve-container",
                )
                logging.info(api_response)
            raise e
        return json_response
Exemplo n.º 3
0
def test_set_credentials_gcp():
    '''Test GCP credentials creating'''
    kserve_client = KServeClient()
    sa_name = constants.DEFAULT_SA_NAME
    kserve_client.set_credentials(
        storage_type='gcs',
        namespace=KSERVE_TEST_NAMESPACE,
        credentials_file='./credentials/gcp_credentials.json',
        sa_name=sa_name)
    created_sa = get_created_sa(sa_name)
    created_secret_name = created_sa.secrets[0].name
    created_secret = get_created_secret(created_secret_name)
    assert created_secret.data[
        constants.GCS_CREDS_FILE_DEFAULT_NAME] == gcp_testing_creds
Exemplo n.º 4
0
def test_lightgbm_kserve():
    service_name = "isvc-lightgbm"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        lightgbm=V1beta1LightGBMSpec(
            storage_uri="gs://kfserving-examples/models/lightgbm/iris",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "256Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v3.json")
    assert res["predictions"][0][0] > 0.5
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 5
0
def test_sklearn_v2_kserve():
    service_name = "isvc-sklearn-v2"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://seldon-models/sklearn/mms/lr_model",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "512Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
    assert res["outputs"][0]["data"] == [1, 1]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 6
0
def test_pmml_runtime_kserve():
    service_name = 'isvc-pmml-runtime'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="pmml",
            ),
            storage_uri='gs://kfserving-examples/models/pmml',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '256Mi'},
                limits={'cpu': '100m', 'memory': '256Mi'}
            )
        )
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                        name=service_name, namespace=KSERVE_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, './data/pmml_input.json')
    assert (res["predictions"] == [{'Species': 'setosa',
                                    'Probability_setosa': 1.0,
                                    'Probability_versicolor': 0.0,
                                    'Probability_virginica': 0.0,
                                    'Node_Id': '2'}])
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 7
0
def test_sklearn_runtime_kserve():
    service_name = "isvc-sklearn-runtime"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="sklearn",
            ),
            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "100m", "memory": "256Mi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 8
0
def test_azure_credentials():
    '''Test Azure credentials creating'''
    kserve_client = KServeClient()
    sa_name = constants.DEFAULT_SA_NAME
    kserve_client.set_credentials(
        storage_type='Azure',
        namespace=KSERVE_TEST_NAMESPACE,
        credentials_file='./credentials/azure_credentials.json',
        sa_name=sa_name)
    created_sa = get_created_sa(sa_name)
    created_secret_name = created_sa.secrets[0].name
    created_secret = get_created_secret(created_secret_name)
    assert created_secret.data['AZ_CLIENT_ID'] == 'dXNlcgo='
    assert created_secret.data['AZ_CLIENT_SECRET'] == 'cGFzc3dvcmQ='
    assert created_secret.data[
        'AZ_SUBSCRIPTION_ID'] == 'MzMzMzMzMzMtMzMzMy0zMzMzLTMzMzMtMzMzMzMz'
    assert created_secret.data['AZ_TENANT_ID'] == 'MTIzNAo='
Exemplo n.º 9
0
def test_torchserve_grpc():
    service_name = "mnist-grpc"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri=
            "gs://kfserving-examples/models/torchserve/image_classifier/v1",
            ports=[
                V1ContainerPort(container_port=7070,
                                name="h2c",
                                protocol="TCP")
            ],
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "1Gi"
                },
                limits={
                    "cpu": "1",
                    "memory": "1Gi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    with open("./data/torchserve_input.json", 'rb') as f:
        data = f.read()

    input_data = {'data': data}
    stub = grpc_stub(service_name, KSERVE_TEST_NAMESPACE)
    response = stub.Predictions(
        inference_pb2.PredictionsRequest(model_name='mnist', input=input_data))

    prediction = response.prediction.decode('utf-8')
    json_output = json.loads(prediction)
    print(json_output)
    assert (json_output["predictions"][0][0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 10
0
def test_set_credentials_s3():
    """Test S3 credentials creating."""
    kfserving = KServeClient()
    credentials_file = './credentials/aws_credentials'

    # Test creating service account case.
    sa_name = constants.DEFAULT_SA_NAME
    if check_sa_exists(sa_name):
        delete_sa(sa_name)

    kfserving.set_credentials(storage_type='s3',
                              namespace=KSERVE_TEST_NAMESPACE,
                              credentials_file=credentials_file,
                              s3_profile='default',
                              s3_endpoint='s3.us-west-2.amazonaws.com',
                              s3_region='us-west-2',
                              s3_use_https='1',
                              s3_verify_ssl='0')

    sa_body = get_created_sa(sa_name)
    created_secret_name = sa_body.secrets[0].name
    created_secret = get_created_secret(created_secret_name)

    config = configparser.ConfigParser()
    config.read([expanduser(credentials_file)])
    s3_access_key_id = config.get('default', 'aws_access_key_id')
    s3_secret_access_key = config.get('default', 'aws_secret_access_key')

    assert created_secret.data[
        constants.S3_ACCESS_KEY_ID_DEFAULT_NAME] == s3_access_key_id
    assert created_secret.data[
        constants.S3_SECRET_ACCESS_KEY_DEFAULT_NAME] == s3_secret_access_key
    assert created_secret.metadata.annotations[
        constants.KSERVE_GROUP +
        '/s3-endpoint'] == 's3.us-west-2.amazonaws.com'
    assert created_secret.metadata.annotations[constants.KSERVE_GROUP +
                                               '/s3-region'] == 'us-west-2'
    assert created_secret.metadata.annotations[constants.KSERVE_GROUP +
                                               '/s3-usehttps'] == '1'
    assert created_secret.metadata.annotations[constants.KSERVE_GROUP +
                                               '/s3-verifyssl'] == '0'
Exemplo n.º 11
0
def test_transformer():
    service_name = 'raw-transformer'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-examples/models/torchserve/image_classifier/v1',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '1Gi'},
                limits={'cpu': '1', 'memory': '1Gi'}
            )
        ),
    )
    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[V1Container(
            image='809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:'
                  + os.environ.get("PULL_BASE_SHA"),
            name='kserve-container',
            resources=V1ResourceRequirements(
                requests={'cpu': '100m', 'memory': '1Gi'},
                limits={'cpu': '100m', 'memory': '1Gi'}),
            args=["--model_name", "mnist"],
            env=[V1EnvVar(name="STORAGE_URI", value="gs://kfserving-examples/models/torchserve/image_classifier/v1")])]
    )

    annotations = dict()
    annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment'
    annotations['kubernetes.io/ingress.class'] = 'istio'
    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                       name=service_name, namespace=KSERVE_TEST_NAMESPACE, annotations=annotations),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor, transformer=transformer))

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(
            service_name, namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(kserve_client.api_instance.get_namespaced_custom_object("serving.knative.dev", "v1",
                                                                      KSERVE_TEST_NAMESPACE,
                                                                      "services", service_name + "-predictor-default"))
        raise e

    res = predict(service_name, "./data/transformer.json", model_name="mnist")
    assert(res.get("predictions")[0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 12
0
def test_paddle_runtime():
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="paddle", ),
            storage_uri=
            "https://zhouti-mcp-edge.cdn.bcebos.com/resnet50.tar.gz",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "200m",
                    "memory": "4Gi"
                },
            )))

    service_name = 'isvc-paddle-runtime'
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=V1ObjectMeta(name=service_name,
                              namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE,
                                      timeout_seconds=720)
    except RuntimeError as e:
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            logging.info(pod)
        raise e

    res = predict(service_name, './data/jay.json')
    assert np.argmax(res["predictions"][0]) == 17

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 13
0
def test_pytorch():
    service_name = 'isvc-pytorch'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        pytorch=V1beta1TorchServeSpec(
            storage_uri='gs://kfserving-samples/models/pytorch/cifar10',
            model_class_name="Net",
            resources=V1ResourceRequirements(requests={
                'cpu': '100m',
                'memory': '2Gi'
            },
                                             limits={
                                                 'cpu': '100m',
                                                 'memory': '2Gi'
                                             })))

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        pods = kserve_client.core_api.list_namespaced_pod(
            KSERVE_TEST_NAMESPACE,
            label_selector='serving.kserve.io/inferenceservice={}'.format(
                service_name))
        for pod in pods.items:
            print(pod)
        raise e
    res = predict(service_name, './data/cifar_input.json')
    assert (np.argmax(res["predictions"]) == 3)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 14
0
def test_xgboost_v2_runtime_kserve():
    service_name = "isvc-xgboost-v2-runtime"

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="xgboost", ),
            runtime="kserve-mlserver",
            storage_uri="gs://kfserving-samples/models/xgboost/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "1024Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name,
                  "./data/iris_input_v2.json",
                  protocol_version="v2")
    assert res["outputs"][0]["data"] == [1.0, 1.0]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 15
0
def test_lightgbm_v2_runtime_kserve():
    service_name = "isvc-lightgbm-v2-runtime"

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(
                name="lightgbm",
            ),
            runtime="kserve-mlserver",
            storage_uri="gs://kfserving-examples/models/lightgbm/v2/iris",
            protocol_version="v2",
            resources=V1ResourceRequirements(
                requests={"cpu": "100m", "memory": "256Mi"},
                limits={"cpu": "1", "memory": "1Gi"},
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name, namespace=KSERVE_TEST_NAMESPACE
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name, namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name, "./data/iris_input_v2.json", protocol_version="v2")
    assert res["outputs"][0]["data"] == [
        8.796664107010673e-06,
        0.9992300031041593,
        0.0007612002317336916,
        4.974786820804187e-06,
        0.9999919650711493,
        3.0601420299625077e-06]

    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 16
0
def test_raw_deployment_kserve():
    service_name = "raw-sklearn"
    annotations = dict()
    annotations['serving.kserve.io/deploymentMode'] = 'RawDeployment'
    annotations['kubernetes.io/ingress.class'] = 'istio'

    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            storage_uri="gs://kfserving-examples/models/sklearn/1.0/model",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "256Mi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(
            name=service_name,
            namespace=KSERVE_TEST_NAMESPACE,
            annotations=annotations,
        ),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)
    res = predict(service_name, "./data/iris_input.json")
    assert res["predictions"] == [1, 1]
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 17
0
def test_torchserve_runtime_kserve():
    service_name = "mnist-runtime"
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="pytorch", ),
            storage_uri=
            "gs://kfserving-examples/models/torchserve/image_classifier/v1",
            protocol_version="v1",
            resources=V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "4Gi"
                },
                limits={
                    "cpu": "1",
                    "memory": "4Gi"
                },
            ),
        ),
    )

    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    res = predict(service_name,
                  "./data/torchserve_input.json",
                  model_name="mnist")
    assert (res.get("predictions")[0] == 2)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 18
0
from kubernetes import client

from kserve import KServeClient
from kserve import constants
from kserve import V1beta1PredictorSpec
from kserve import V1beta1SKLearnSpec
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1InferenceService
from kserve import V1beta1LoggerSpec
from kubernetes.client import V1ResourceRequirements
from kubernetes.client import V1Container
from ..common.utils import predict
from ..common.utils import KSERVE_TEST_NAMESPACE
import time

kserve_client = KServeClient(config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))


def test_kserve_logger():
    msg_dumper = 'message-dumper'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        containers=[V1Container(name="kserve-container",
                                image='gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display')]
    )

    isvc = V1beta1InferenceService(api_version=constants.KSERVE_V1BETA1,
                                   kind=constants.KSERVE_KIND,
                                   metadata=client.V1ObjectMeta(
                                        name=msg_dumper, namespace=KSERVE_TEST_NAMESPACE),
                                   spec=V1beta1InferenceServiceSpec(predictor=predictor))
Exemplo n.º 19
0
def test_mms_sklearn_kserve(protocol_version: str, storage_uri: str):
    # Define an inference service
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        sklearn=V1beta1SKLearnSpec(
            protocol_version=protocol_version,
            resources=client.V1ResourceRequirements(
                requests={
                    "cpu": "100m",
                    "memory": "512Mi"
                },
                limits={
                    "cpu": "100m",
                    "memory": "1024Mi"
                },
            ),
        ),
    )

    service_name = f"isvc-sklearn-mms-{protocol_version}"
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor),
    )

    # Create an instance of inference service with isvc
    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    kserve_client.wait_isvc_ready(service_name,
                                  namespace=KSERVE_TEST_NAMESPACE)

    cluster_ip = get_cluster_ip()

    model_names = [
        f"model1-sklearn-{protocol_version}",
        f"model2-sklearn-{protocol_version}",
    ]

    for model_name in model_names:
        model_spec = V1alpha1ModelSpec(
            storage_uri=storage_uri,
            memory="128Mi",
            framework="sklearn",
        )

        model = V1alpha1TrainedModel(
            api_version=constants.KSERVE_V1ALPHA1,
            kind=constants.KSERVE_KIND_TRAINEDMODEL,
            metadata=client.V1ObjectMeta(name=model_name,
                                         namespace=KSERVE_TEST_NAMESPACE),
            spec=V1alpha1TrainedModelSpec(inference_service=service_name,
                                          model=model_spec),
        )

        # Create instances of trained models using model1 and model2
        kserve_client.create_trained_model(model, KSERVE_TEST_NAMESPACE)

        kserve_client.wait_model_ready(
            service_name,
            model_name,
            isvc_namespace=KSERVE_TEST_NAMESPACE,
            isvc_version=constants.KSERVE_V1BETA1_VERSION,
            protocol_version=protocol_version,
            cluster_ip=cluster_ip,
        )

    input_json = "./data/iris_input.json"
    if protocol_version == "v2":
        input_json = "./data/iris_input_v2.json"

    responses = [
        predict(
            service_name,
            input_json,
            model_name=model_name,
            protocol_version=protocol_version,
        ) for model_name in model_names
    ]

    if protocol_version == "v1":
        assert responses[0]["predictions"] == [1, 1]
        assert responses[1]["predictions"] == [1, 1]
    elif protocol_version == "v2":
        assert responses[0]["outputs"][0]["data"] == [1, 1]
        assert responses[1]["outputs"][0]["data"] == [1, 1]

    # Clean up inference service and trained models
    for model_name in model_names:
        kserve_client.delete_trained_model(model_name, KSERVE_TEST_NAMESPACE)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 20
0
def test_triton_runtime():
    service_name = 'isvc-triton-runtime'
    predictor = V1beta1PredictorSpec(
        min_replicas=1,
        model=V1beta1ModelSpec(
            model_format=V1beta1ModelFormat(name="pytorch", ),
            runtime="kserve-tritonserver",
            storage_uri='gs://kfserving-examples/models/torchscript',
            ports=[
                V1ContainerPort(name="h2c",
                                protocol="TCP",
                                container_port=9000)
            ]))

    transformer = V1beta1TransformerSpec(
        min_replicas=1,
        containers=[
            V1Container(
                image=
                '809251082950.dkr.ecr.us-west-2.amazonaws.com/kserve/image-transformer:'
                + os.environ.get("PULL_BASE_SHA"),
                name='kserve-container',
                resources=V1ResourceRequirements(requests={
                    'cpu': '100m',
                    'memory': '1Gi'
                },
                                                 limits={
                                                     'cpu': '100m',
                                                     'memory': '1Gi'
                                                 }),
                args=["--model_name", "cifar10", "--protocol", "grpc-v2"])
        ])
    isvc = V1beta1InferenceService(
        api_version=constants.KSERVE_V1BETA1,
        kind=constants.KSERVE_KIND,
        metadata=client.V1ObjectMeta(name=service_name,
                                     namespace=KSERVE_TEST_NAMESPACE),
        spec=V1beta1InferenceServiceSpec(predictor=predictor,
                                         transformer=transformer))

    kserve_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    kserve_client.create(isvc)
    try:
        kserve_client.wait_isvc_ready(service_name,
                                      namespace=KSERVE_TEST_NAMESPACE)
    except RuntimeError as e:
        print(
            kserve_client.api_instance.get_namespaced_custom_object(
                "serving.knative.dev", "v1", KSERVE_TEST_NAMESPACE, "services",
                service_name + "-predictor-default"))
        deployments = kserve_client.app_api. \
            list_namespaced_deployment(KSERVE_TEST_NAMESPACE, label_selector='serving.kserve.io/'
                                       'inferenceservice={}'.
                                       format(service_name))
        for deployment in deployments.items:
            print(deployment)
        raise e
    res = predict(service_name, "./data/image.json", model_name='cifar10')
    assert (np.argmax(res.get("predictions")[0]) == 5)
    kserve_client.delete(service_name, KSERVE_TEST_NAMESPACE)
Exemplo n.º 21
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest.mock import patch

from kubernetes import client

from kserve import V1beta1PredictorSpec
from kserve import V1beta1TFServingSpec
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1InferenceService
from kserve import KServeClient

kserve_client = KServeClient(config_file='./kserve/test/kubeconfig')

mocked_unit_result = \
    '''
{
    "api_version": "serving.kserve.io/v1beta1",
    "kind": "InferenceService",
    "metadata": {
        "name": "flower-sample",
        "namespace": "kubeflow"
    },
    "spec": {
        "predictor": {
            "tensorflow": {
                "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers"
            }
Exemplo n.º 22
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from unittest.mock import patch

from kubernetes import client

from kserve import V1beta1PredictorSpec
from kserve import V1beta1TFServingSpec
from kserve import V1beta1InferenceServiceSpec
from kserve import V1beta1InferenceService
from kserve import KServeClient

kserve_client = KServeClient()

mocked_unit_result = \
    '''
{
    "api_version": "serving.kserve.io/v1beta1",
    "kind": "InferenceService",
    "metadata": {
        "name": "flower-sample",
        "namespace": "kubeflow"
    },
    "spec": {
        "predictor": {
            "tensorflow": {
                "storage_uri": "gs://kfserving-samples/models/tensorflow/flowers"
            }
Exemplo n.º 23
0
def perform_action(action,
                   model_name,
                   model_uri,
                   canary_traffic_percent,
                   namespace,
                   framework,
                   custom_model_spec,
                   service_account,
                   inferenceservice_yaml,
                   request_timeout,
                   autoscaling_target=0,
                   enable_istio_sidecar=True,
                   watch_timeout=300,
                   min_replicas=0,
                   max_replicas=0):
    """
    Perform the specified action. If the action is not 'delete' and `inferenceService_yaml`
    was provided, the dict representation of the YAML will be sent directly to the
    Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the
    provided input and then sent for creation/update.
    :return InferenceService JSON output
    """
    kserve_client = KServeClient()

    if inferenceservice_yaml:
        # Overwrite name and namespace if exists
        if namespace:
            inferenceservice_yaml['metadata']['namespace'] = namespace

        if model_name:
            inferenceservice_yaml['metadata']['name'] = model_name
        else:
            model_name = inferenceservice_yaml['metadata']['name']

        isvc = inferenceservice_yaml

    elif action != 'delete':
        # Create annotations
        annotations = {}
        if int(autoscaling_target) != 0:
            annotations["autoscaling.knative.dev/target"] = str(
                autoscaling_target)
        if not enable_istio_sidecar:
            annotations["sidecar.istio.io/inject"] = 'false'
        if not annotations:
            annotations = None
        metadata = client.V1ObjectMeta(name=model_name,
                                       namespace=namespace,
                                       annotations=annotations)

        # If a custom model container spec was provided, build the V1Container
        # object using it.
        containers = []
        if custom_model_spec:
            containers = [create_custom_container_spec(custom_model_spec)]

        # Build the V1beta1PredictorSpec.
        predictor_spec = create_predictor_spec(framework, model_uri,
                                               canary_traffic_percent,
                                               service_account, min_replicas,
                                               max_replicas, containers,
                                               request_timeout)

        isvc = create_inference_service(metadata, predictor_spec)

    if action == "create":
        submit_api_request(kserve_client,
                           'create',
                           model_name,
                           isvc,
                           namespace,
                           watch=True,
                           timeout_seconds=watch_timeout)
    elif action == "update":
        submit_api_request(kserve_client,
                           'update',
                           model_name,
                           isvc,
                           namespace,
                           watch=True,
                           timeout_seconds=watch_timeout)
    elif action == "apply":
        try:
            submit_api_request(kserve_client,
                               'create',
                               model_name,
                               isvc,
                               namespace,
                               watch=True,
                               timeout_seconds=watch_timeout)
        except Exception:
            submit_api_request(kserve_client,
                               'update',
                               model_name,
                               isvc,
                               namespace,
                               watch=True,
                               timeout_seconds=watch_timeout)
    elif action == "delete":
        kserve_client.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = kserve_client.get(model_name, namespace=namespace)
    return model_status