示例#1
0
def predict_str(service_name, input_json, protocol_version="v1",
                version=constants.KSERVE_V1BETA1_VERSION, model_name=None):
    kfs_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    isvc = kfs_client.get(
        service_name,
        namespace=KSERVE_TEST_NAMESPACE,
        version=version,
    )
    # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604
    time.sleep(10)
    cluster_ip = get_cluster_ip()
    host = urlparse(isvc["status"]["url"]).netloc
    headers = {"Host": host}

    if model_name is None:
        model_name = service_name

    url = f"http://{cluster_ip}/v1/models/{model_name}:predict"
    if protocol_version == "v2":
        url = f"http://{cluster_ip}/v2/models/{model_name}/infer"

    logging.info("Sending Header = %s", headers)
    logging.info("Sending url = %s", url)
    logging.info("Sending request data: %s", input_json)
    response = requests.post(url, input_json, headers=headers)
    logging.info("Got response code %s, content %s", response.status_code, response.content)
    preds = json.loads(response.content.decode("utf-8"))
    return preds
示例#2
0
def explain_response(service_name, input_json):
    kfs_client = KServeClient(
        config_file=os.environ.get("KUBECONFIG", "~/.kube/config"))
    isvc = kfs_client.get(
        service_name,
        namespace=KSERVE_TEST_NAMESPACE,
        version=constants.KSERVE_V1BETA1_VERSION,
    )
    # temporary sleep until this is fixed https://github.com/kserve/kserve/issues/604
    time.sleep(10)
    cluster_ip = get_cluster_ip()
    host = urlparse(isvc["status"]["url"]).netloc
    url = "http://{}/v1/models/{}:explain".format(cluster_ip, service_name)
    headers = {"Host": host}
    with open(input_json) as json_file:
        data = json.load(json_file)
        logging.info("Sending request data: %s", json.dumps(data))
        try:
            response = requests.post(url, json.dumps(data), headers=headers)
            logging.info(
                "Got response code %s, content %s",
                response.status_code,
                response.content,
            )
            json_response = json.loads(response.content.decode("utf-8"))
        except (RuntimeError, json.decoder.JSONDecodeError) as e:
            logging.info("Explain error -------")
            logging.info(
                kfs_client.api_instance.get_namespaced_custom_object(
                    "serving.knative.dev",
                    "v1",
                    KSERVE_TEST_NAMESPACE,
                    "services",
                    service_name + "-explainer",
                ))
            pods = kfs_client.core_api.list_namespaced_pod(
                KSERVE_TEST_NAMESPACE,
                label_selector="serving.kserve.io/inferenceservice={}".format(
                    service_name),
            )
            for pod in pods.items:
                logging.info(pod)
                logging.info(
                    "%s\t%s\t%s" %
                    (pod.metadata.name, pod.status.phase, pod.status.pod_ip))
                api_response = kfs_client.core_api.read_namespaced_pod_log(
                    pod.metadata.name,
                    KSERVE_TEST_NAMESPACE,
                    container="kserve-container",
                )
                logging.info(api_response)
            raise e
        return json_response
示例#3
0
def perform_action(action,
                   model_name,
                   model_uri,
                   canary_traffic_percent,
                   namespace,
                   framework,
                   custom_model_spec,
                   service_account,
                   inferenceservice_yaml,
                   request_timeout,
                   autoscaling_target=0,
                   enable_istio_sidecar=True,
                   watch_timeout=300,
                   min_replicas=0,
                   max_replicas=0):
    """
    Perform the specified action. If the action is not 'delete' and `inferenceService_yaml`
    was provided, the dict representation of the YAML will be sent directly to the
    Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the
    provided input and then sent for creation/update.
    :return InferenceService JSON output
    """
    kserve_client = KServeClient()

    if inferenceservice_yaml:
        # Overwrite name and namespace if exists
        if namespace:
            inferenceservice_yaml['metadata']['namespace'] = namespace

        if model_name:
            inferenceservice_yaml['metadata']['name'] = model_name
        else:
            model_name = inferenceservice_yaml['metadata']['name']

        isvc = inferenceservice_yaml

    elif action != 'delete':
        # Create annotations
        annotations = {}
        if int(autoscaling_target) != 0:
            annotations["autoscaling.knative.dev/target"] = str(
                autoscaling_target)
        if not enable_istio_sidecar:
            annotations["sidecar.istio.io/inject"] = 'false'
        if not annotations:
            annotations = None
        metadata = client.V1ObjectMeta(name=model_name,
                                       namespace=namespace,
                                       annotations=annotations)

        # If a custom model container spec was provided, build the V1Container
        # object using it.
        containers = []
        if custom_model_spec:
            containers = [create_custom_container_spec(custom_model_spec)]

        # Build the V1beta1PredictorSpec.
        predictor_spec = create_predictor_spec(framework, model_uri,
                                               canary_traffic_percent,
                                               service_account, min_replicas,
                                               max_replicas, containers,
                                               request_timeout)

        isvc = create_inference_service(metadata, predictor_spec)

    if action == "create":
        submit_api_request(kserve_client,
                           'create',
                           model_name,
                           isvc,
                           namespace,
                           watch=True,
                           timeout_seconds=watch_timeout)
    elif action == "update":
        submit_api_request(kserve_client,
                           'update',
                           model_name,
                           isvc,
                           namespace,
                           watch=True,
                           timeout_seconds=watch_timeout)
    elif action == "apply":
        try:
            submit_api_request(kserve_client,
                               'create',
                               model_name,
                               isvc,
                               namespace,
                               watch=True,
                               timeout_seconds=watch_timeout)
        except Exception:
            submit_api_request(kserve_client,
                               'update',
                               model_name,
                               isvc,
                               namespace,
                               watch=True,
                               timeout_seconds=watch_timeout)
    elif action == "delete":
        kserve_client.delete(model_name, namespace=namespace)
    else:
        raise ("Error: No matching action: " + action)

    model_status = kserve_client.get(model_name, namespace=namespace)
    return model_status