def test_ecs_pytorch_inference_gpu(pytorch_inference, ecs_container_instance,
                                   region, gpu_only):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id,
                                                region=region)
    num_gpus = ec2_utils.get_instance_num_gpus(worker_instance_id,
                                               region=region)

    model_name = "pytorch-densenet"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            pytorch_inference,
            "pytorch",
            ecs_cluster_arn,
            model_name,
            worker_instance_id,
            num_gpus=num_gpus,
            region=region)
        inference_result = request_pytorch_inference_densenet(
            public_ip_address)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn,
                                                  service_name, task_family,
                                                  revision)
示例#2
0
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    mms_inference_cmd = test_utils.get_mms_run_command(model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                      f" -p 80:8080 -p 8081:8081"
                      f" {image_uri} {mms_inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)
def test_ecs_pytorch_inference_eia(pytorch_inference_eia,
                                   ecs_container_instance, ei_accelerator_type,
                                   region, eia_only):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id,
                                                region=region)

    model_name = "pytorch-densenet"
    image_framework, image_framework_version = get_framework_and_version_from_tag(
        pytorch_inference_eia)
    if image_framework_version == "1.3.1":
        model_name = "pytorch-densenet-v1-3-1"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            pytorch_inference_eia,
            "pytorch",
            ecs_cluster_arn,
            model_name,
            worker_instance_id,
            ei_accelerator_type,
            region=region)
        inference_result = request_pytorch_inference_densenet(
            public_ip_address)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn,
                                                  service_name, task_family,
                                                  revision)
def test_ecs_pytorch_inference_neuron(pytorch_inference_neuron,
                                      ecs_container_instance, region):
    worker_instance_id, ecs_cluster_arn = ecs_container_instance
    public_ip_address = ec2_utils.get_public_ip(worker_instance_id,
                                                region=region)
    num_neurons = ec2_utils.get_instance_num_inferentias(worker_instance_id,
                                                         region=region)

    model_name = "pytorch-resnet-neuron"
    service_name = task_family = revision = None
    try:
        service_name, task_family, revision = ecs_utils.setup_ecs_inference_service(
            pytorch_inference_neuron,
            "pytorch",
            ecs_cluster_arn,
            model_name,
            worker_instance_id,
            num_neurons=num_neurons,
            region=region)
        server_type = get_inference_server_type(pytorch_inference_neuron)
        inference_result = request_pytorch_inference_densenet(
            public_ip_address, server_type=server_type, model_name=model_name)
        assert inference_result, f"Failed to perform inference at IP address: {public_ip_address}"

    finally:
        ecs_utils.tear_down_ecs_inference_service(ecs_cluster_arn,
                                                  service_name, task_family,
                                                  revision)
def test_eks_pytorch_densenet_inference(pytorch_inference):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "eia" in pytorch_inference:
        pytest.skip("Skipping EKS Test for EIA")
    elif "neuron" in pytorch_inference:
        pytest.skip(
            "Neuron specific test is run and so skipping this test for Neuron")
    elif server_type == "ts":
        model = "pytorch-densenet=https://torchserve.s3.amazonaws.com/mar_files/densenet161.mar"
        server_cmd = "torchserve"
    else:
        model = "pytorch-densenet=https://dlc-samples.s3.amazonaws.com/pytorch/multi-model-server/densenet/densenet.mar"
        server_cmd = "multi-model-server"

    num_replicas = "1"

    rand_int = random.randint(4001, 6000)

    processor = "gpu" if "gpu" in pytorch_inference else "cpu"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"densenet-service-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd
    }

    if processor == "gpu":
        search_replace_dict["<NUM_GPUS>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward, server_type=server_type)
    except ValueError as excp:
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
示例#6
0
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "neuron" not in pytorch_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")

    model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar"
    server_cmd = "/usr/local/bin/entrypoint.sh -m pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar -t /home/model-server/config.properties"
    num_replicas = "1"
    rand_int = random.randint(4001, 6000)
    processor = "neuron"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet-{processor}-{rand_int}"

    search_replace_dict = {
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)
    device_plugin_path = eks_utils.get_device_plugin_path("pytorch", processor)

    try:
        # TODO - once eksctl gets the latest neuron device plugin this can be removed
        run("kubectl delete -f {}".format(device_plugin_path))
        sleep(60)
        run("kubectl apply -f {}".format(device_plugin_path))
        sleep(10)

        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward)
    except ValueError as excp:
        run("kubectl cluster-info dump")
        eks_utils.LOGGER.error("Service is not running: %s", excp)
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
示例#7
0
def test_eks_pytorch_neuron_inference(pytorch_inference, neuron_only):
    server_type = test_utils.get_inference_server_type(pytorch_inference)
    if "neuron" not in pytorch_inference:
        pytest.skip("Skipping EKS Neuron Test for EIA and Non Neuron Images")
    else:
        model = "pytorch-resnet-neuron=https://aws-dlc-sample-models.s3.amazonaws.com/pytorch/Resnet50-neuron.mar"
        server_cmd = "torchserve"

    num_replicas = "1"
    rand_int = random.randint(4001, 6000)
    processor = "neuron"

    yaml_path = os.path.join(
        os.sep, "tmp",
        f"pytorch_single_node_{processor}_inference_{rand_int}.yaml")
    inference_service_name = selector_name = f"resnet-{processor}-{rand_int}"

    search_replace_dict = {
        "<MODELS>": model,
        "<NUM_REPLICAS>": num_replicas,
        "<SELECTOR_NAME>": selector_name,
        "<INFERENCE_SERVICE_NAME>": inference_service_name,
        "<DOCKER_IMAGE_BUILD_ID>": pytorch_inference,
        "<SERVER_TYPE>": server_type,
        "<SERVER_CMD>": server_cmd,
    }

    search_replace_dict["<NUM_INF1S>"] = "1"

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.get_single_node_inference_template_path(
            "pytorch", processor), yaml_path, search_replace_dict)

    try:
        run("kubectl apply -f {}".format(yaml_path))

        port_to_forward = random.randint(49152, 65535)

        if eks_utils.is_service_running(selector_name):
            eks_utils.eks_forward_port_between_host_and_container(
                selector_name, port_to_forward, "8080")

        assert test_utils.request_pytorch_inference_densenet(
            port=port_to_forward,
            server_type=server_type,
            model_name="pytorch-resnet-neuron")
    finally:
        run(f"kubectl delete deployment {selector_name}")
        run(f"kubectl delete service {selector_name}")
def ec2_pytorch_inference(image_uri, processor, ec2_connection, region):
    repo_name, image_tag = image_uri.split("/")[-1].split(":")
    container_name = f"{repo_name}-{image_tag}-ec2"
    model_name = "pytorch-densenet"
    if processor == "eia":
        image_framework, image_framework_version = get_framework_and_version_from_tag(
            image_uri)
        if image_framework_version == "1.3.1":
            model_name = "pytorch-densenet-v1-3-1"
    if processor == "neuron":
        model_name = "pytorch-resnet-neuron"

    inference_cmd = test_utils.get_inference_run_command(
        image_uri, model_name, processor)
    docker_cmd = "nvidia-docker" if "gpu" in image_uri else "docker"

    if processor == "neuron":
        ec2_connection.run("sudo systemctl stop neuron-rtd"
                           )  # Stop neuron-rtd in host env for DLC to start it
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" --device=/dev/neuron0 --cap-add IPC_LOCK"
                          f" --env NEURON_MONITOR_CW_REGION={region}"
                          f" {image_uri} {inference_cmd}")
    else:
        docker_run_cmd = (f"{docker_cmd} run -itd --name {container_name}"
                          f" -p 80:8080 -p 8081:8081"
                          f" {image_uri} {inference_cmd}")
    try:
        ec2_connection.run(
            f"$(aws ecr get-login --no-include-email --region {region})",
            hide=True)
        LOGGER.info(docker_run_cmd)
        ec2_connection.run(docker_run_cmd, hide=True)
        server_type = get_inference_server_type(image_uri)
        inference_result = test_utils.request_pytorch_inference_densenet(
            connection=ec2_connection,
            model_name=model_name,
            server_type=server_type)
        assert (
            inference_result
        ), f"Failed to perform pytorch inference test for image: {image_uri} on ec2"

    finally:
        ec2_connection.run(f"docker rm -f {container_name}",
                           warn=True,
                           hide=True)