示例#1
0
def test_load_batch(
    printer: Callable,
    client: cx.Client,
    api: str,
    test_s3_path: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    retry_attempts: int = 0,
    api_config_name: str = "cortex.yaml",
):

    jobs = load_config["jobs"]
    workers_per_job = load_config["workers_per_job"]
    items_per_job = load_config["items_per_job"]
    batch_size = load_config["batch_size"]
    workload_timeout = load_config["workload_timeout"]

    bucket, key = re.match("s3://(.+?)/(.+)", test_s3_path).groups()
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    sample_generator_path = api_dir / "sample_generator.py"
    assert (sample_generator_path.exists()
            ), "sample_generator.py must be present for the batch load test"
    sample_generator = load_generator(sample_generator_path)

    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)
    api_endpoint = client.get_api(api_name)["endpoint"]

    try:
        assert endpoint_ready(
            client=client, api_name=api_name,
            timeout=deploy_timeout), f"api {api_name} not ready"

        # submit jobs
        printer(f"submitting {jobs} jobs")
        job_specs = []
        for _ in range(jobs):
            for _ in range(retry_attempts + 1):
                response = request_batch_prediction(
                    client,
                    api_name,
                    item_list=[
                        sample_generator() for _ in range(items_per_job)
                    ],
                    batch_size=batch_size,
                    workers=workers_per_job,
                    config={"dest_s3_dir": test_s3_path},
                )
                if response.status_code == HTTPStatus.OK:
                    break
                time.sleep(1)
            # retries are only required once
            retry_attempts = 0

            assert (
                response.status_code == HTTPStatus.OK
            ), f"status code: got {response.status_code}, expected {HTTPStatus.OK} ({response.text})"
            job_specs.append(response.json())

        # wait the jobs to finish
        printer("waiting on the jobs")
        assert jobs_done(
            client, api_name, [job_spec["job_id"] for job_spec in job_specs],
            workload_timeout), f"not all jobs succeed in {workload_timeout}s"

        # assert jobs
        printer("checking the jobs' responses")
        for job_spec in job_specs:
            job_id: str = job_spec["job_id"]
            job_status = requests.get(
                f"{api_endpoint}?jobID={job_id}").json()["job_status"]

            assert (
                job_status["batches_in_queue"] == 0
            ), f"there are still batches in queue ({job_status['batches_in_queue']}) for job ID {job_id}"
            assert job_status["batch_metrics"]["succeeded"] == math.ceil(
                items_per_job / batch_size)

            num_objects = 0
            for page in paginator.paginate(Bucket=bucket,
                                           Prefix=os.path.join(key, job_id)):
                num_objects += len(page["Contents"])
            assert num_objects == 1

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)

            # only get the last 10 job statuses
            if "batch_job_statuses" in api_info and len(
                    api_info["batch_job_statuses"]) > 10:
                api_info["batch_job_statuses"] = api_info[
                    "batch_job_statuses"][-10:]

            printer(json.dumps(api_info, indent=2))
        except:
            pass
        raise

    finally:
        delete_apis(client, [api_name])
示例#2
0
def test_load_task(
    printer: Callable,
    client: cx.Client,
    api: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    retry_attempts: int = 0,
    poll_sleep_seconds: int = 1,
    api_config_name: str = "cortex.yaml",
):

    jobs = load_config["jobs"]
    concurrency = load_config["concurrency"]
    submit_timeout = load_config["submit_timeout"]
    workload_timeout = load_config["workload_timeout"]

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    request_stopper = td.Event()
    map_stopper = td.Event()
    try:
        assert endpoint_ready(
            client=client, api_name=api_name,
            timeout=deploy_timeout), f"api {api_name} not ready"

        # give the operator time to start
        time.sleep(1 * retry_attempts)

        # submit jobs
        printer(f"submitting {jobs} jobs concurrently")
        job_specs = []
        threads_futures = request_tasks_concurrently(client, api_name,
                                                     request_stopper,
                                                     concurrency, jobs,
                                                     job_specs)

        assert wait_on_event(
            request_stopper, submit_timeout
        ), f"{jobs} jobs couldn't be submitted in {submit_timeout}s"
        check_futures_healthy(threads_futures)
        wait_on_futures(threads_futures)

        printer("waiting on the jobs")
        job_ids = [job_spec.json()["job_id"] for job_spec in job_specs]
        retrieve_results_concurrently(
            client,
            api_name,
            concurrency,
            map_stopper,
            job_ids,
            poll_sleep_seconds=poll_sleep_seconds,
            timeout=workload_timeout,
        )

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)

            # only get the last 10 job statuses
            if "task_job_statuses" in api_info and len(
                    api_info["task_job_statuses"]) > 10:
                api_info["task_job_statuses"] = api_info["task_job_statuses"][
                    -10:]

            printer(json.dumps(api_info, indent=2))
        except:
            pass
        raise

    finally:
        map_stopper.set()
        delete_apis(client, [api_name])
示例#3
0
def test_load_async(
    printer: Callable,
    client: cx.Client,
    api: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    poll_sleep_seconds: int = 1,
    api_config_name: str = "cortex.yaml",
):

    total_requests = load_config["total_requests"]
    desired_replicas = load_config["desired_replicas"]
    concurrency = load_config["concurrency"]
    submit_timeout = load_config["submit_timeout"]
    workload_timeout = load_config["workload_timeout"]

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    assert len(api_specs) == 1
    api_specs[0]["autoscaling"] = {
        "min_replicas": desired_replicas,
        "max_replicas": desired_replicas,
    }
    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    request_stopper = td.Event()
    map_stopper = td.Event()
    responses: List[Dict[str, Any]] = []
    try:
        printer(f"getting {desired_replicas} replicas ready")
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"api {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        printer("start making prediction requests concurrently")
        threads_futures = make_requests_concurrently(
            client,
            api_name,
            concurrency,
            request_stopper,
            responses=responses,
            max_total_requests=total_requests,
            payload=payload,
        )
        assert wait_on_event(
            request_stopper, submit_timeout
        ), f"{total_requests} couldn't be submitted in {submit_timeout}s"
        check_futures_healthy(threads_futures)
        wait_on_futures(threads_futures)

        printer("finished making prediction requests")
        assert (
            len(responses) == total_requests
        ), f"the submitted number of requests doesn't match the returned number of responses"

        job_ids = []
        for response in responses:
            response_json = response.json()
            assert "id" in response_json
            job_ids.append(response_json["id"])

        # assert the results
        printer("start retrieving the async results concurrently")
        results = []
        retrieve_results_concurrently(
            client,
            api_name,
            concurrency,
            map_stopper,
            job_ids,
            results,
            poll_sleep_seconds,
            workload_timeout,
        )
        for request_id, json_result in results:
            # validate keys are in the result json response
            assert (
                "id" in json_result
            ), f"id key was not present in result response (response: {json_result})"
            assert (
                "result" in json_result
            ), f"result key was not present in result response (response: {json_result})"
            assert (
                "timestamp" in json_result
            ), f"timestamp key was not present in result response (response: {json_result})"

            # validate result json response has valid values
            assert (
                json_result["id"] == request_id
            ), f"result 'id' and request 'id' mismatch ({json_result['id']} != {request_id})"
            assert (
                json_result["status"] == "completed"
            ), f"async workload did not complete (response: {json_result})"
            assert json_result[
                "timestamp"] != "", "result 'timestamp' value was empty"
            assert json_result[
                "result"] != "", "result 'result' value was empty"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
        except:
            pass
        raise

    finally:
        if "results" in vars() and len(results) < total_requests:
            printer(
                f"{len(results)}/{total_requests} have been successfully retrieved"
            )
        map_stopper.set()
        delete_apis(client, [api_name])
示例#4
0
def test_realtime_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    timeout: int = None,
    api_config_name: str = "cortex.yaml",
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    expectations = None
    expectations_file = api_dir / "expectations.yaml"
    if expectations_file.exists():
        expectations = parse_expectations(str(expectations_file))

    api_name = api_specs[0]["name"]
    for api_spec in api_specs:
        client.create_api(api_spec=api_spec, project_dir=api_dir)

    try:
        assert apis_ready(client=client, api_names=[api_name],
                          timeout=timeout), f"apis {api_name} not ready"

        if not expectations or "grpc" not in expectations:
            with open(str(api_dir / "sample.json")) as f:
                payload = json.load(f)
            response = request_prediction(client, api_name, payload)

            assert (
                response.status_code == HTTPStatus.OK
            ), f"status code: got {response.status_code}, expected {HTTPStatus.OK}"

            if expectations and "response" in expectations:
                assert_response_expectations(response,
                                             expectations["response"])

        if expectations and "grpc" in expectations:
            stub, input_sample, output_values, output_type, is_output_stream = generate_grpc(
                client, api_name, api_dir, expectations["grpc"])
            if is_output_stream:
                for response, output_val in zip(stub.Predict(input_sample),
                                                output_values):
                    assert (
                        type(response) == output_type
                    ), f"didn't receive response of type {str(output_type)}, but received {str(type(response))}"
                    assert response == output_val, f"received {response} instead of {output_val}"
            else:
                response = stub.Predict(input_sample)
                assert (
                    type(stub.Predict(input_sample)) == output_type
                ), f"didn't receive response of type {str(output_type)}, but received {str(type(response))}"
                assert (response == output_values[0]
                        ), f"received {response} instead of {output_values[0]}"
    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
            td.Thread(target=lambda: client.stream_api_logs(api_name),
                      daemon=True).start()
            time.sleep(5)
        except:
            pass
        raise
    finally:
        delete_apis(client, [api_name])
示例#5
0
def retrieve_results_concurrently(
    client: cx.Client,
    api_name: str,
    concurrency: int,
    event_stopper: td.Event,
    job_ids: List[str],
    responses: List[Tuple[str, Dict[str, Any]]] = [],
    poll_sleep_seconds: int = 1,
    timeout: Optional[int] = None,
):
    api_info = client.get_api(api_name)
    task_kind = api_info["spec"]["kind"] == "TaskAPI"
    async_kind = api_info["spec"]["kind"] == "AsyncAPI"
    if not task_kind and not async_kind:
        raise ValueError(
            "function can only be called for TaskAPI/AsyncAPI kinds")

    exec = futures.ThreadPoolExecutor(concurrency)
    thread_local = td.local()

    def _get_session() -> requests.Session:
        if not hasattr(thread_local, "session"):
            thread_local.session = requests.Session()
        return thread_local.session

    def _retriever(request_id: str):
        session = _get_session()

        while not event_stopper.is_set():
            if task_kind:
                result_response = session.get(
                    f"{api_info['endpoint']}?jobID={request_id}")
            if async_kind:
                result_response = session.get(
                    f"{api_info['endpoint']}/{request_id}")

            if result_response.status_code != HTTPStatus.OK:
                content = result_response.content.decode("utf-8")
                if "error" in content:
                    event_stopper.set()
                    raise RuntimeError(
                        f"received {result_response.status_code} status code with the following message: {content}"
                    )
                time.sleep(poll_sleep_seconds)
                continue

            result_response_json = result_response.json()
            if (async_kind and "status" in result_response_json
                    and result_response_json["status"] == "completed"):
                break
            if (task_kind and "job_status" in result_response_json
                    and "status" in result_response_json["job_status"]
                    and result_response_json["job_status"]["status"]
                    == "status_succeeded"):
                break

        if event_stopper.is_set():
            return

        responses.append((request_id, result_response_json))

    # will throw an exception if something failed in any thread
    for _ in exec.map(_retriever, job_ids, timeout=timeout):
        pass
示例#6
0
def test_load_realtime(
    printer: Callable,
    client: cx.Client,
    api: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex.yaml",
):

    total_requests = load_config["total_requests"]
    desired_replicas = load_config["desired_replicas"]
    concurrency = load_config["concurrency"]
    min_rtt = load_config["min_rtt"]
    max_rtt = load_config["max_rtt"]
    avg_rtt = load_config["avg_rtt"]
    avg_rtt_tolerance = load_config["avg_rtt_tolerance"]
    status_code_timeout = load_config["status_code_timeout"]

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1
    api_specs[0]["autoscaling"] = {
        "min_replicas": desired_replicas,
        "max_replicas": desired_replicas,
    }
    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    # controls the flow of requests
    request_stopper = td.Event()
    latencies: List[float] = []
    try:
        printer(f"getting {desired_replicas} replicas ready")
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"api {api_name} not ready"

        # give the APIs some time to prevent getting high latency spikes in the beginning
        time.sleep(5)

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        printer("start making requests concurrently")
        threads_futures = make_requests_concurrently(
            client,
            api_name,
            concurrency,
            request_stopper,
            latencies=latencies,
            max_total_requests=total_requests,
            payload=payload,
        )

        while not request_stopper.is_set():
            current_min_rtt = min(latencies) if len(latencies) > 0 else min_rtt
            assert (
                current_min_rtt >= min_rtt
            ), f"min latency threshold hit; got {current_min_rtt}s, but the lowest accepted latency is {min_rtt}s"

            current_max_rtt = max(latencies) if len(latencies) > 0 else max_rtt
            assert (
                current_max_rtt <= max_rtt
            ), f"max latency threshold hit; got {current_max_rtt}s, but the highest accepted latency is {max_rtt}s"

            current_avg_rtt = sum(latencies) / len(latencies) if len(
                latencies) > 0 else avg_rtt
            assert (
                current_avg_rtt > avg_rtt - avg_rtt_tolerance
                and current_avg_rtt < avg_rtt + avg_rtt_tolerance
            ), f"avg latency ({current_avg_rtt}s) falls outside the expected range ({avg_rtt - avg_rtt_tolerance}s - {avg_rtt + avg_rtt_tolerance})"

            api_info = client.get_api(api_name)
            network_stats = api_info["metrics"]["network_stats"]

            assert (
                network_stats["code_4xx"] == 0
            ), f"detected 4xx response codes ({network_stats['code_4xx']}) in cortex get"
            assert (
                network_stats["code_5xx"] == 0
            ), f"detected 5xx response codes ({network_stats['code_5xx']}) in cortex get"

            printer(
                f"min RTT: {current_min_rtt} | max RTT: {current_max_rtt} | avg RTT: {current_avg_rtt} | requests: {network_stats['code_2xx']} (out of {total_requests})"
            )

            # check if the requesting threads are still healthy
            # if not, they'll raise an exception
            check_futures_healthy(threads_futures)

            # don't stress the CPU too hard
            time.sleep(1)

        printer("verifying number of processed requests using the client")
        assert api_requests(
            client, api_name, total_requests, timeout=status_code_timeout
        ), f"the number of 2xx response codes for api {api_name} doesn't match the expected number {total_requests}"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
        except:
            pass
        raise

    finally:
        request_stopper.set()
        delete_apis(client, [api_name])
示例#7
0
def test_async_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    deploy_timeout: int = None,
    poll_retries: int = 5,
    poll_sleep_seconds: int = 1,
    api_config_name: str = "cortex.yaml",
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    expectations = None
    expectations_file = api_dir / "expectations.yaml"
    if expectations_file.exists():
        expectations = parse_expectations(str(expectations_file))

    assert len(api_specs) == 1

    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    try:
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"apis {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        response = request_prediction(client, api_name, payload)

        assert (
            response.status_code == HTTPStatus.OK
        ), f"workload submission status code: got {response.status_code}, expected {HTTPStatus.OK}"

        response_json = response.json()
        assert "id" in response_json

        request_id = response_json["id"]

        result_response = None
        for i in range(poll_retries + 1):
            result_response = retrieve_async_result(client=client,
                                                    api_name=api_name,
                                                    request_id=request_id)

            if result_response.status_code == HTTPStatus.OK:
                break

            time.sleep(poll_sleep_seconds)

        assert (
            result_response.status_code == HTTPStatus.OK
        ), f"result retrieval status code: got {result_response.status_code}, expected {HTTPStatus.OK}"

        result_response_json = result_response.json()

        # validate keys are in the result json response
        assert (
            "id" in result_response_json
        ), f"id key was not present in result response (response: {result_response_json})"
        assert (
            "status" in result_response_json
        ), f"status key was not present in result response (response: {result_response_json})"
        assert (
            "result" in result_response_json
        ), f"result key was not present in result response (response: {result_response_json})"
        assert (
            "timestamp" in result_response_json
        ), f"timestamp key was not present in result response (response: {result_response_json})"

        # validate result json response has valid values
        assert (
            result_response_json["id"] == request_id
        ), f"result 'id' and request 'id' mismatch ({result_response_json['id']} != {request_id})"
        assert (
            result_response_json["status"] == "completed"
        ), f"async workload did not complete (response: {result_response_json})"
        assert result_response_json[
            "timestamp"] != "", "result 'timestamp' value was empty"
        assert result_response_json[
            "result"] != "", "result 'result' value was empty"

        # assert result expectations
        if expectations:
            assert_json_expectations(result_response_json["result"],
                                     expectations["response"])

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))

            job_status = client.get_job(api_name, result_response_json["id"])
            printer(json.dumps(job_status, indent=2))
            td.Thread(
                target=lambda: client.stream_job_logs(
                    api_name, result_response_json["id"]),
                daemon=True,
            ).start()
            time.sleep(5)
        except:
            pass
        raise

    finally:
        delete_apis(client, [api_name])
示例#8
0
def request_prediction(client: cx.Client, api_name: str,
                       payload: Union[List, Dict]) -> requests.Response:
    api_info = client.get_api(api_name)
    response = requests.post(api_info["endpoint"], json=payload)

    return response
示例#9
0
def test_long_running_realtime(
    printer: Callable,
    client: cx.Client,
    api: str,
    long_running_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex_cpu.yaml",
    node_groups: List[str] = [],
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    time_to_run = long_running_config["time_to_run"]

    if len(node_groups) > 0:
        api_specs[0]["node_groups"] = node_groups

    expectations = None
    expectations_file = api_dir / "expectations.yaml"
    if expectations_file.exists():
        expectations = parse_expectations(str(expectations_file))

    api_name = api_specs[0]["name"]
    for api_spec in api_specs:
        client.deploy(api_spec=api_spec)

    try:
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"apis {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        counter = 0
        start_time = time.time()
        while time.time() - start_time <= time_to_run:
            response = post_request(client, api_name, payload)

            assert (
                response.status_code == HTTPStatus.OK
            ), f"status code: got {response.status_code}, expected {HTTPStatus.OK}"

            if expectations and "response" in expectations:
                assert_response_expectations(response,
                                             expectations["response"])

            counter += 1

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
            td.Thread(target=lambda: stream_api_logs(client, api_name),
                      daemon=True).start()
            time.sleep(5)
        finally:
            raise
    finally:
        delete_apis(client, [api_name])
示例#10
0
def test_batch_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    test_s3_path: str,
    deploy_timeout: int = None,
    job_timeout: int = None,
    retry_attempts: int = 0,
    api_config_name: str = "cortex.yaml",
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    assert len(api_specs) == 1

    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    try:
        assert endpoint_ready(
            client=client, api_name=api_name,
            timeout=deploy_timeout), f"api {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        response = None
        for _ in range(retry_attempts + 1):
            response = request_batch_prediction(
                client,
                api_name,
                item_list=payload,
                batch_size=2,
                config={"dest_s3_dir": test_s3_path},
            )
            if response.status_code == HTTPStatus.OK:
                break

            time.sleep(1)

        assert (
            response.status_code == HTTPStatus.OK
        ), f"status code: got {response.status_code}, expected {HTTPStatus.OK} ({response.text})"

        job_spec = response.json()

        # monitor job progress
        assert job_done(
            client=client,
            api_name=job_spec["api_name"],
            job_id=job_spec["job_id"],
            timeout=job_timeout,
        ), f"job did not succeed (api_name: {api_name}, job_id: {job_spec['job_id']})"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))

            job_status = client.get_job(api_name, job_spec["job_id"])
            printer(json.dumps(job_status, indent=2))
            td.Thread(target=lambda: client.stream_job_logs(
                api_name, job_spec["job_id"]),
                      daemon=True).start()
            time.sleep(5)
        except:
            pass
        raise

    finally:
        delete_apis(client, [api_name])
示例#11
0
def test_load_realtime(
    printer: Callable,
    client: cx.Client,
    api: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex_cpu.yaml",
    node_groups: List[str] = [],
):

    total_requests = load_config["total_requests"]
    desired_replicas = load_config["desired_replicas"]
    concurrency = load_config["concurrency"]
    status_code_timeout = load_config["status_code_timeout"]

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    api_specs[0]["autoscaling"] = {
        "min_replicas": desired_replicas,
        "max_replicas": desired_replicas,
    }
    if len(node_groups) > 0:
        api_specs[0]["node_groups"] = node_groups

    api_name = api_specs[0]["name"]
    client.deploy(api_spec=api_specs[0])

    # controls the flow of requests
    request_stopper = td.Event()
    failed = False
    try:
        printer(f"getting {desired_replicas} replicas ready")
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"api {api_name} not ready"

        # give the APIs some time to prevent getting high latency spikes in the beginning
        time.sleep(5)

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        printer("start making requests concurrently")
        threads_futures = make_requests_concurrently(
            client,
            api_name,
            concurrency,
            request_stopper,
            max_total_requests=total_requests,
            payload=payload,
        )

        while not request_stopper.is_set():
            # check if the requesting threads are still healthy
            # if not, they'll raise an exception
            check_futures_healthy(threads_futures)

            # don't stress the CPU too hard
            time.sleep(1)
    except:
        # best effort
        failed = True
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
        finally:
            raise

    finally:
        request_stopper.set()
        delete_apis(client, [api_name])
        if failed:
            time.sleep(30)
示例#12
0
def test_task_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    deploy_timeout: int = None,
    job_timeout: int = None,
    retry_attempts: int = 0,
    api_config_name: str = "cortex_cpu.yaml",
    node_groups: List[str] = [],
    local_operator: bool = False,
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    assert len(api_specs) == 1

    if len(node_groups) > 0:
        api_specs[0]["node_groups"] = node_groups

    api_name = api_specs[0]["name"]
    client.deploy(api_spec=api_specs[0])

    try:
        endpoint_override = f"http://localhost:8888/tasks/{api_name}" if local_operator else None
        assert endpoint_ready(
            client=client,
            api_name=api_name,
            timeout=deploy_timeout,
            endpoint_override=endpoint_override,
        ), f"api {api_name} not ready"

        response = None
        for _ in range(retry_attempts + 1):
            response = request_task(client,
                                    api_name,
                                    local_operator=local_operator)
            if response.status_code == HTTPStatus.OK:
                break

            time.sleep(1)

        job_spec = response.json()

        job_id = job_spec["job_id"]
        endpoint_override = (
            f"http://localhost:8888/tasks/{api_name}?jobID={job_id}"
            if local_operator else None)
        assert job_done(
            client=client,
            api_name=api_name,
            job_id=job_spec["job_id"],
            timeout=job_timeout,
            endpoint_override=endpoint_override,
        ), f"task job did not succeed (api_name: {api_name}, job_id: {job_spec['job_id']})"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))

            job_status = client.get_job(api_name, job_spec["job_id"])
            printer(json.dumps(job_status, indent=2))
            td.Thread(target=lambda: stream_job_logs(client, api_name,
                                                     job_spec["job_id"]),
                      daemon=True).start()
            time.sleep(5)
        except:
            pass
        raise

    finally:
        delete_apis(client, [api_name])
示例#13
0
def test_batch_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    test_s3_path: str,
    deploy_timeout: int = None,
    job_timeout: int = None,
    retry_attempts: int = 0,
    api_config_name: str = "cortex_cpu.yaml",
    node_groups: List[str] = [],
    local_operator: bool = False,
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    if len(node_groups) > 0:
        api_specs[0]["node_groups"] = node_groups

    api_name = api_specs[0]["name"]
    client.deploy(api_spec=api_specs[0])

    try:
        endpoint_override = f"http://localhost:8888/batch/{api_name}" if local_operator else None
        assert endpoint_ready(
            client=client,
            api_name=api_name,
            timeout=deploy_timeout,
            endpoint_override=endpoint_override,
        ), f"api {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        response = None
        for _ in range(retry_attempts + 1):
            response = request_batch_prediction(
                client,
                api_name,
                item_list=payload,
                batch_size=2,
                config={"dest_s3_dir": test_s3_path},
                local_operator=local_operator,
            )
            if response.status_code == HTTPStatus.OK:
                break

            time.sleep(1)

        assert (
            response.status_code == HTTPStatus.OK
        ), f"status code: got {response.status_code}, expected {HTTPStatus.OK} ({response.text})"

        job_spec = response.json()

        # monitor job progress
        job_id = job_spec["job_id"]
        endpoint_override = (
            f"http://localhost:8888/batch/{api_name}?jobID={job_id}"
            if local_operator else None)
        assert job_done(
            client=client,
            api_name=api_name,
            job_id=job_id,
            timeout=job_timeout,
            endpoint_override=endpoint_override,
        ), f"job did not succeed (api_name: {api_name}, job_id: {job_spec['job_id']})"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))

            job_status = client.get_job(api_name, job_spec["job_id"])
            printer(json.dumps(job_status, indent=2))

            td.Thread(
                target=lambda: stream_job_logs(client, api_name, job_spec[
                    "job_id"]),
                daemon=True,
            ).start()
            time.sleep(5)
        finally:
            raise
    finally:
        delete_apis(client, [api_name])
示例#14
0
def test_long_running_realtime(
    printer: Callable,
    client: cx.Client,
    api: str,
    long_running_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex.yaml",
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    time_to_run = long_running_config["time_to_run"]
    status_code_timeout = long_running_config["status_code_timeout"]

    expectations = None
    expectations_file = api_dir / "expectations.yaml"
    if expectations_file.exists():
        expectations = parse_expectations(str(expectations_file))

    api_name = api_specs[0]["name"]
    for api_spec in api_specs:
        client.create_api(api_spec=api_spec, project_dir=api_dir)

    try:
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"apis {api_name} not ready"

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        counter = 0
        start_time = time.time()
        while time.time() - start_time <= time_to_run:
            response = request_prediction(client, api_name, payload)

            assert (
                response.status_code == HTTPStatus.OK
            ), f"status code: got {response.status_code}, expected {HTTPStatus.OK}"

            if expectations and "response" in expectations:
                assert_response_expectations(response,
                                             expectations["response"])

            counter += 1

        printer("verifying number of processed requests using the client")
        assert api_requests(
            client, api_name, counter, timeout=status_code_timeout
        ), f"the number of 2xx response codes for api {api_name} doesn't match the expected number {counter}"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
            td.Thread(target=lambda: client.stream_api_logs(api_name),
                      daemon=True).start()
            time.sleep(5)
        except:
            pass
        raise
    finally:
        delete_apis(client, [api_name])
示例#15
0
def test_task_api(
    printer: Callable,
    client: cx.Client,
    api: str,
    deploy_timeout: int = None,
    job_timeout: int = None,
    retry_attempts: int = 0,
    api_config_name: str = "cortex.yaml",
):
    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)

    assert len(api_specs) == 1

    api_name = api_specs[0]["name"]
    client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    try:
        assert endpoint_ready(
            client=client, api_name=api_name,
            timeout=deploy_timeout), f"api {api_name} not ready"

        response = None
        for _ in range(retry_attempts + 1):
            response = request_task(
                client,
                api_name,
            )
            if response.status_code == HTTPStatus.OK:
                break

            time.sleep(1)

        job_spec = response.json()

        assert job_done(
            client=client,
            api_name=api_name,
            job_id=job_spec["job_id"],
            timeout=job_timeout,
        ), f"task job did not succeed (api_name: {api_name}, job_id: {job_spec['job_id']})"

    except:
        # best effort
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))

            job_status = client.get_job(api_name, job_spec["job_id"])
            printer(json.dumps(job_status, indent=2))
            td.Thread(target=lambda: client.stream_job_logs(
                api_name, job_spec["job_id"]),
                      daemon=True).start()
            time.sleep(5)
        except:
            pass
        raise

    finally:
        delete_apis(client, [api_name])
示例#16
0
文件: tests.py 项目: gtrevg/cortex
def delete_apis(client: cx.Client, api_names: List[str]):
    for name in api_names:
        client.delete_api(name)
示例#17
0
def test_autoscaling(
    printer: Callable,
    client: cx.Client,
    apis: Dict[str, Any],
    autoscaling_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex.yaml",
):
    max_replicas = autoscaling_config["max_replicas"]
    query_params = apis["query_params"]

    # increase the concurrency by 1 to ensure we get max_replicas replicas
    concurrency = max_replicas + 1

    all_apis = [apis["primary"]] + apis["dummy"]
    all_api_names = []
    for api in all_apis:
        api_dir = TEST_APIS_DIR / api
        with open(str(api_dir / api_config_name)) as f:
            api_specs = yaml.safe_load(f)
        assert len(api_specs) == 1
        api_specs[0]["autoscaling"] = {
            "max_replicas": max_replicas,
            "downscale_stabilization_period": "1m",
        }
        all_api_names.append(api_specs[0]["name"])
        client.create_api(api_spec=api_specs[0], project_dir=api_dir)

    primary_api_name = all_api_names[0]
    autoscaling = client.get_api(primary_api_name)["spec"]["autoscaling"]

    # controls the flow of requests
    request_stopper = td.Event()

    # determine upscale/downscale replica requests
    current_replicas = 1  # starting number of replicas
    test_timeout = 0  # measured in seconds
    while current_replicas < max_replicas:
        upscale_ceil = math.ceil(current_replicas *
                                 autoscaling["max_upscale_factor"])
        if upscale_ceil > current_replicas + 1:
            current_replicas = upscale_ceil
        else:
            current_replicas += 1
        if current_replicas > max_replicas:
            current_replicas = max_replicas
        test_timeout += int(autoscaling["upscale_stabilization_period"] /
                            (1000**3))
    while current_replicas > 1:
        downscale_ceil = math.ceil(current_replicas *
                                   autoscaling["max_downscale_factor"])
        if downscale_ceil < current_replicas - 1:
            current_replicas = downscale_ceil
        else:
            current_replicas -= 1
        test_timeout += int(autoscaling["downscale_stabilization_period"] /
                            (1000**3))

    # add overhead to the test timeout to account for the process of downloading images or adding nodes to the cluster
    test_timeout *= 2

    try:
        assert apis_ready(
            client=client, api_names=all_api_names,
            timeout=deploy_timeout), f"apis {all_api_names} not ready"

        threads_futures = make_requests_concurrently(client,
                                                     primary_api_name,
                                                     concurrency,
                                                     request_stopper,
                                                     query_params=query_params)

        test_start_time = time.time()

        # upscale/downscale the api
        printer(f"scaling up to {max_replicas} replicas")
        while True:
            assert api_updated(
                client, primary_api_name, timeout=deploy_timeout
            ), "api didn't scale up to the desired number of replicas in time"
            current_replicas = client.get_api(
                primary_api_name)["status"]["replica_counts"]["requested"]

            # stop the requests from being made
            if current_replicas == max_replicas and not request_stopper.is_set(
            ):
                printer(f"scaling back down to 1 replica")
                request_stopper.set()

            # check if the requesting threads are still healthy
            # if not, they'll raise an exception
            check_futures_healthy(threads_futures)

            # check if the test is taking too much time
            assert (
                time.time() - test_start_time < test_timeout
            ), f"autoscaling test for api {primary_api_name} did not finish in {test_timeout}s; current number of replicas is {current_replicas}/{concurrency}"

            # stop the test if it has finished
            if current_replicas == 1 and request_stopper.is_set():
                break

            # add some delay to reduce the number of gets
            time.sleep(1)

    except:
        # best effort
        try:
            api_info = client.get_api(primary_api_name)
            printer(json.dumps(api_info, indent=2))
        except:
            pass
        raise

    finally:
        request_stopper.set()
        delete_apis(client, all_api_names)
示例#18
0
def retrieve_async_result(cliett: cx.Client, api_name: str,
                          request_id: str) -> requests.Response:
    api_info = cliett.get_api(api_name)
    response = requests.get(f"{api_info['endpoint']}/{request_id}")

    return response
示例#19
0
def test_load_realtime(
    printer: Callable,
    client: cx.Client,
    api: str,
    load_config: Dict[str, Union[int, float]],
    deploy_timeout: int = None,
    api_config_name: str = "cortex_cpu.yaml",
    node_groups: List[str] = [],
):

    total_requests = load_config["total_requests"]
    desired_replicas = load_config["desired_replicas"]
    concurrency = load_config["concurrency"]
    status_code_timeout = load_config["status_code_timeout"]

    api_dir = TEST_APIS_DIR / api
    with open(str(api_dir / api_config_name)) as f:
        api_specs = yaml.safe_load(f)
    assert len(api_specs) == 1

    api_specs[0]["autoscaling"] = {
        "min_replicas": desired_replicas,
        "max_replicas": desired_replicas,
    }
    if len(node_groups) > 0:
        api_specs[0]["node_groups"] = node_groups

    api_name = api_specs[0]["name"]
    client.deploy(api_spec=api_specs[0])

    # controls the flow of requests
    request_stopper = td.Event()
    failed = False
    try:
        printer(f"getting {desired_replicas} replicas ready")
        assert apis_ready(client=client,
                          api_names=[api_name],
                          timeout=deploy_timeout), f"api {api_name} not ready"

        network_stats = client.get_api(api_name)["metrics"]["network_stats"]
        offset_2xx = network_stats["code_2xx"]
        offset_4xx = network_stats["code_4xx"]
        offset_5xx = network_stats["code_5xx"]

        if offset_2xx is None:
            offset_2xx = 0
        if offset_4xx is None:
            offset_4xx = 0
        if offset_5xx is None:
            offset_5xx = 0

        # give the APIs some time to prevent getting high latency spikes in the beginning
        time.sleep(5)

        with open(str(api_dir / "sample.json")) as f:
            payload = json.load(f)

        printer("start making requests concurrently")
        threads_futures = make_requests_concurrently(
            client,
            api_name,
            concurrency,
            request_stopper,
            max_total_requests=total_requests,
            payload=payload,
        )

        while not request_stopper.is_set():
            api_info = client.get_api(api_name)
            network_stats = api_info["metrics"]["network_stats"]

            assert (
                network_stats["code_4xx"] - offset_4xx == 0
            ), f"detected 4xx response codes ({network_stats['code_4xx'] - offset_4xx}) in cortex get"
            assert (
                network_stats["code_5xx"] - offset_5xx == 0
            ), f"detected 5xx response codes ({network_stats['code_5xx'] - offset_5xx}) in cortex get"

            # check if the requesting threads are still healthy
            # if not, they'll raise an exception
            check_futures_healthy(threads_futures)

            # don't stress the CPU too hard
            time.sleep(1)

        printer(
            f"verifying number of processed requests ({total_requests}, with an offset of {offset_2xx}) using the client"
        )
        assert api_requests(
            client,
            api_name,
            total_requests + offset_2xx,
            timeout=status_code_timeout
        ), f"the number of 2xx response codes for api {api_name} doesn't match the expected number {total_requests}"

    except:
        # best effort
        failed = True
        try:
            api_info = client.get_api(api_name)
            printer(json.dumps(api_info, indent=2))
        finally:
            raise

    finally:
        request_stopper.set()
        delete_apis(client, [api_name])
        if failed:
            time.sleep(30)