Exemplos de get_container_name em Python, exemplos de test.test_utils.get_container_name em Python

Exemplo n.º 1

0

Exibir arquivo

def test_ubuntu_version(image):
    """
    Check that the ubuntu version in the image tag is the same as the one on a running container.

    :param image: ECR image URI
    """
    ctx = Context()
    container_name = get_container_name("ubuntu-version", image)

    ubuntu_version = ""
    for tag_split in image.split("-"):
        if tag_split.startswith("ubuntu"):
            ubuntu_version = tag_split.split("ubuntu")[-1]

    start_container(container_name, image, ctx)
    output = run_cmd_on_container(container_name, ctx, "cat /etc/os-release")
    container_ubuntu_version = output.stdout

    assert "Ubuntu" in container_ubuntu_version
    assert ubuntu_version in container_ubuntu_version

Exemplo n.º 2

0

Exibir arquivo

Arquivo: test_torchvision.py Projeto: saimidu/deep-learning-containers

def test_torchvision_nms_training(pytorch_training):
    """
    Check that the internally built torchvision binary is used to resolve the missing nms issue.
    :param pytorch_training: framework fixture for pytorch training
    """
    _, framework_version = get_framework_and_version_from_tag(pytorch_training)
    if Version(framework_version) >= Version("1.10.0"):
        pytest.skip(
            "Skipping this test for PT 1.10.0 and onward, since torch.ops.torchvision.nms api is outdated."
        )
    if Version(framework_version) == Version(
            "1.5.1") and get_processor_from_image_uri(
                pytorch_training) == "gpu":
        pytest.skip("Skipping this test for PT 1.5.1 GPU Training DLC images")
    ctx = Context()
    container_name = get_container_name("torchvision-nms", pytorch_training)
    start_container(container_name, pytorch_training, ctx)
    run_cmd_on_container(
        container_name,
        ctx,
        f"import torch; import torchvision; print(torch.ops.torchvision.nms)",
        executable="python")

Exemplo n.º 3

0

Exibir arquivo

def test_framework_version_cpu(image):
    """
    Check that the framework version in the image tag is the same as the one on a running container.
    This function tests CPU, EIA, and Neuron images.

    :param image: ECR image URI
    """
    if "gpu" in image:
        pytest.skip(
            "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu"
        )
    image_repo_name, _ = get_repository_and_tag_from_image_uri(image)
    if re.fullmatch(r"(pr-|beta-|nightly-)?tensorflow-inference(-eia)?",
                    image_repo_name):
        pytest.skip(
            msg=
            "TF inference for CPU/GPU/EIA does not have core tensorflow installed"
        )

    tested_framework, tag_framework_version = get_framework_and_version_from_tag(
        image)

    # Framework name may include huggingface
    tested_framework = tested_framework.lstrip("huggingface_")
    # Module name is torch
    if tested_framework == "pytorch":
        tested_framework = "torch"
    ctx = Context()
    container_name = get_container_name("framework-version", image)
    start_container(container_name, image, ctx)
    output = run_cmd_on_container(
        container_name,
        ctx,
        f"import {tested_framework}; print({tested_framework}.__version__)",
        executable="python")
    if is_canary_context():
        assert tag_framework_version in output.stdout.strip()
    else:
        assert tag_framework_version == output.stdout.strip()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_pre_release.py Projeto: aws-vrnatham/deep-learning-containers

def test_tf_serving_version_cpu(tensorflow_inference):
    """
    For non-huggingface non-GPU TF inference images, check that the tag version matches the version of TF serving
    in the container.

    Huggingface includes MMS and core TF, hence the versioning scheme is based off of the underlying tensorflow
    framework version, rather than the TF serving version.

    GPU inference images will be tested along side `test_framework_and_cuda_version_gpu` in order to be judicious
    about GPU resources. This test can run directly on the host, and thus does not require additional resources
    to be spun up.

    @param tensorflow_inference: ECR image URI
    """
    # Set local variable to clarify contents of fixture
    image = tensorflow_inference

    if "gpu" in image:
        pytest.skip(
            "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu"
        )
    if "neuron" in image:
        pytest.skip(
            "Neuron images will have their framework version tested in test_framework_and_neuron_sdk_version"
        )

    _, tag_framework_version = get_framework_and_version_from_tag(image)

    ctx = Context()
    container_name = get_container_name("tf-serving-version", image)
    start_container(container_name, image, ctx)
    output = run_cmd_on_container(container_name,
                                  ctx,
                                  "tensorflow_model_server --version",
                                  executable="bash")
    assert re.match(rf"TensorFlow ModelServer: {tag_framework_version}(\D+)?", output.stdout), \
        f"Cannot find model server version {tag_framework_version} in {output.stdout}"

    stop_and_remove_container(container_name, ctx)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_utility_installation.py Projeto: philschmid/deep-learning-containers

def test_sagemaker_studio_analytics_extension(training, package_name):
    framework, framework_version = test_utils.get_framework_and_version_from_tag(
        training)
    utility_package_minimum_framework_version = {
        "pytorch": "1.7",
        "tensorflow": "2.4"
    }
    utility_package_maximum_framework_version = {
        "pytorch": "1.8",
        "tensorflow": "2.6"
    }

    if framework not in utility_package_minimum_framework_version or Version(
            framework_version) < Version(
                utility_package_minimum_framework_version[framework]
            ) or Version(framework_version) > Version(
                utility_package_maximum_framework_version[framework]):
        pytest.skip(
            f"sagemaker_studio_analytics_extension is not installed in {framework} {framework_version} DLCs"
        )

    ctx = Context()
    container_name = test_utils.get_container_name(
        f"sagemaker_studio_analytics_extension-{package_name}", training)
    test_utils.start_container(container_name, training, ctx)

    # Optionally add version validation in the following steps, rather than just printing it.
    test_utils.run_cmd_on_container(container_name, ctx,
                                    f"pip list | grep -i {package_name}")
    import_package = package_name.replace("-", "_")
    import_test_cmd = (f"import {import_package}" if package_name in [
        "sagemaker-studio-sparkmagic-lib",
        "sagemaker-studio-analytics-extension"
    ] else f"import {import_package}; print({import_package}.__version__)")
    test_utils.run_cmd_on_container(container_name,
                                    ctx,
                                    import_test_cmd,
                                    executable="python")

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_pre_release.py Projeto: yselivonchyk/deep-learning-containers

def test_stray_files(image):
    """
    Test to ensure that unnecessary build artifacts are not present in any easily visible or tmp directories

    :param image: ECR image URI
    """
    ctx = Context()
    container_name = get_container_name("test_tmp_dirs", image)
    start_container(container_name, image, ctx)

    # Running list of artifacts/artifact regular expressions we do not want in any of the directories
    stray_artifacts = [r"\.py"]

    # Running list of allowed files in the /tmp directory
    allowed_tmp_files = ["hsperfdata_root"]

    # Ensure stray artifacts are not in the tmp directory
    tmp = run_cmd_on_container(container_name, ctx, "ls -A /tmp")
    _assert_artifact_free(tmp, stray_artifacts)

    # Ensure tmp dir is empty except for whitelisted files
    tmp_files = tmp.stdout.split()
    for tmp_file in tmp_files:
        assert (
            tmp_file in allowed_tmp_files
        ), f"Found unexpected file in tmp dir: {tmp_file}. Allowed tmp files: {allowed_tmp_files}"

    # We always expect /var/tmp to be empty
    var_tmp = run_cmd_on_container(container_name, ctx, "ls -A /var/tmp")
    _assert_artifact_free(var_tmp, stray_artifacts)
    assert var_tmp.stdout.strip() == ""

    # Additional check of home and root directories to ensure that stray artifacts are not present
    home = run_cmd_on_container(container_name, ctx, "ls -A ~")
    _assert_artifact_free(home, stray_artifacts)

    root = run_cmd_on_container(container_name, ctx, "ls -A /")
    _assert_artifact_free(root, stray_artifacts)

Exemplo n.º 7

0

Exibir arquivo

def test_sm_profiler_pt(pytorch_training):
    processor = get_processor_from_image_uri(pytorch_training)
    if processor not in ("cpu", "gpu"):
        pytest.skip(f"Processor {processor} not supported. Skipping test.")

    _, image_framework_version = get_framework_and_version_from_tag(pytorch_training)
    if Version(image_framework_version) in SpecifierSet(">=1.12"):
        pytest.skip("sm profiler ZCC test is not supported in PT 1.12 and above")

    ctx = Context()

    profiler_tests_dir = os.path.join(
        os.getenv("CODEBUILD_SRC_DIR"), get_container_name("smprof", pytorch_training), "smprofiler_tests"
    )
    ctx.run(f"mkdir -p {profiler_tests_dir}", hide=True)

    # Download sagemaker-tests zip
    sm_tests_zip = "sagemaker-tests.zip"
    ctx.run(
        f"aws s3 cp {os.getenv('SMPROFILER_TESTS_BUCKET')}/{sm_tests_zip} {profiler_tests_dir}/{sm_tests_zip}",
        hide=True,
    )

    # PT test setup requirements
    with ctx.prefix(f"cd {profiler_tests_dir}"):
        ctx.run(f"unzip {sm_tests_zip}", hide=True)
        with ctx.prefix("cd sagemaker-tests/tests/scripts/pytorch_scripts"):
            ctx.run("mkdir -p data", hide=True)
            ctx.run(
                "aws s3 cp s3://smdebug-testing/datasets/cifar-10-python.tar.gz data/cifar-10-batches-py.tar.gz",
                hide=True,
            )
            ctx.run("aws s3 cp s3://smdebug-testing/datasets/MNIST_pytorch.tar.gz data/MNIST_pytorch.tar.gz", hide=True)
            with ctx.prefix("cd data"):
                ctx.run("tar -zxf MNIST_pytorch.tar.gz", hide=True)
                ctx.run("tar -zxf cifar-10-batches-py.tar.gz", hide=True)

    run_sm_profiler_tests(pytorch_training, profiler_tests_dir, "test_profiler_pytorch.py", processor)

Exemplo n.º 8

0

Exibir arquivo

def test_sm_profiler_tf(tensorflow_training):
    if is_tf_version("1", tensorflow_training):
        pytest.skip("Skipping test on TF1, since there are no smprofiler config files for TF1")
    processor = get_processor_from_image_uri(tensorflow_training)
    if processor not in ("cpu", "gpu"):
        pytest.skip(f"Processor {processor} not supported. Skipping test.")

    ctx = Context()

    profiler_tests_dir = os.path.join(
        os.getenv("CODEBUILD_SRC_DIR"), get_container_name("smprof", tensorflow_training), "smprofiler_tests"
    )
    ctx.run(f"mkdir -p {profiler_tests_dir}", hide=True)

    # Download sagemaker-tests zip
    sm_tests_zip = "sagemaker-tests.zip"
    ctx.run(
        f"aws s3 cp {os.getenv('SMPROFILER_TESTS_BUCKET')}/{sm_tests_zip} {profiler_tests_dir}/{sm_tests_zip}",
        hide=True
    )
    ctx.run(f"cd {profiler_tests_dir} && unzip {sm_tests_zip}", hide=True)

    run_sm_profiler_tests(tensorflow_training, profiler_tests_dir, "test_profiler_tensorflow.py", processor)

Exemplo n.º 9

0

Exibir arquivo

def test_pandas(image):
    """
    It's possible that in newer python versions, we may have issues with installing pandas due to lack of presence
    of the bz2 module in py3 containers. This is a sanity test to ensure that pandas import works properly in all
    containers.

    :param image: ECR image URI
    """
    ctx = Context()
    container_name = get_container_name("pandas", image)
    start_container(container_name, image, ctx)

    # Make sure we can install pandas, do not fail right away if there are pip check issues
    run_cmd_on_container(container_name, ctx, "pip install pandas", warn=True)

    pandas_import_output = run_cmd_on_container(container_name, ctx, "import pandas", executable="python")

    assert (
        not pandas_import_output.stdout.strip()
    ), f"Expected no output when importing pandas, but got  {pandas_import_output.stdout}"

    # Simple import test to ensure we do not get a bz2 module import failure
    run_cmd_on_container(container_name, ctx, "import pandas; print(pandas.__version__)", executable="python")

Exemplo n.º 10

0

Exibir arquivo

def test_python_version(image):
    """
    Check that the python version in the image tag is the same as the one on a running container.

    :param image: ECR image URI
    """
    ctx = Context()
    container_name = get_container_name("py-version", image)

    py_version = ""
    for tag_split in image.split("-"):
        if tag_split.startswith("py"):
            if len(tag_split) > 3:
                py_version = f"Python {tag_split[2]}.{tag_split[3]}"
            else:
                py_version = f"Python {tag_split[2]}"
    start_container(container_name, image, ctx)
    output = run_cmd_on_container(container_name, ctx, "python --version")

    # Due to py2 deprecation, Python2 version gets streamed to stderr. Python installed via Conda also appears to
    # stream to stderr (in some cases).
    container_py_version = output.stdout + output.stderr

    assert py_version in container_py_version, f"Cannot find {py_version} in {container_py_version}"

Exemplo n.º 11

0

Exibir arquivo

def test_utility_packages_using_import(training):
    """
    Verify that utility packages are installed in the Training DLC image

    :param training: training ECR image URI
    """
    ctx = Context()
    container_name = test_utils.get_container_name(
        "utility_packages_using_import", training)
    test_utils.start_container(container_name, training, ctx)

    framework, framework_version = test_utils.get_framework_and_version_from_tag(
        training)
    utility_package_minimum_framework_version = {
        "mxnet": "1.8",
        "pytorch": "1.7",
        "tensorflow2": "2.4",
        "tensorflow1": "1.15",
    }

    framework = "tensorflow1" if framework == "tensorflow" and framework_version.startswith(
        "1.") else "tensorflow2"
    if Version(framework_version) < Version(
            utility_package_minimum_framework_version[framework]):
        pytest.skip("Extra utility packages will be added going forward.")

    for package in UTILITY_PACKAGES_IMPORT:
        version = test_utils.run_cmd_on_container(
            container_name,
            ctx,
            f"import {package}; print({package}.__version__)",
            executable="python").stdout.strip()
        if package == "sagemaker":
            assert Version(version) > Version(
                "2"
            ), f"Sagemaker version should be > 2.0. Found version {sm_version}"

Exemplo n.º 12

0

Exibir arquivo

def test_oss_compliance(image):
    """
    Run oss compliance check on a container to check if license attribution files exist.
    And upload source of third party packages to S3 bucket.
    """
    THIRD_PARTY_SOURCE_CODE_BUCKET = "aws-dlinfra-licenses"
    THIRD_PARTY_SOURCE_CODE_BUCKET_PATH = "third_party_source_code"
    file = "THIRD_PARTY_SOURCE_CODE_URLS"
    container_name = get_container_name("oss_compliance", image)
    context = Context()
    local_repo_path = get_repository_local_path()
    start_container(container_name, image, context)

    # run compliance test to make sure license attribution files exists. testOSSCompliance is copied as part of Dockerfile
    run_cmd_on_container(container_name, context,
                         "/usr/local/bin/testOSSCompliance /root")

    try:
        context.run(
            f"docker cp {container_name}:/root/{file} {os.path.join(local_repo_path, file)}"
        )
    finally:
        context.run(f"docker rm -f {container_name}", hide=True)

    s3_resource = boto3.resource("s3")

    with open(os.path.join(local_repo_path, file)) as source_code_file:
        for line in source_code_file:
            name, version, url = line.split(" ")
            file_name = f"{name}_v{version}_source_code"
            s3_object_path = f"{THIRD_PARTY_SOURCE_CODE_BUCKET_PATH}/{file_name}.tar.gz"
            local_file_path = os.path.join(local_repo_path, file_name)

            for i in range(3):
                try:
                    if not os.path.isdir(local_file_path):
                        context.run(
                            f"git clone {url.rstrip()} {local_file_path}")
                        context.run(
                            f"tar -czvf {local_file_path}.tar.gz {local_file_path}"
                        )
                except Exception as e:
                    time.sleep(1)
                    if i == 2:
                        LOGGER.error(f"Unable to clone git repo. Error: {e}")
                        raise
                    continue
            try:
                if os.path.exists(f"{local_file_path}.tar.gz"):
                    LOGGER.info(f"Uploading package to s3 bucket: {line}")
                    s3_resource.Object(THIRD_PARTY_SOURCE_CODE_BUCKET,
                                       s3_object_path).load()
            except botocore.exceptions.ClientError as e:
                if e.response["Error"]["Code"] == "404":
                    try:
                        # using aws cli as using boto3 expects to upload folder by iterating through each file instead of entire folder.
                        context.run(
                            f"aws s3 cp {local_file_path}.tar.gz s3://{THIRD_PARTY_SOURCE_CODE_BUCKET}/{s3_object_path}"
                        )
                        object = s3_resource.Bucket(
                            THIRD_PARTY_SOURCE_CODE_BUCKET).Object(
                                s3_object_path)
                        object.Acl().put(ACL="public-read")
                    except ClientError as e:
                        LOGGER.error(
                            f"Unable to upload source code to bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}"
                        )
                        raise
                else:
                    LOGGER.error(
                        f"Unable to check if source code is present on bucket {THIRD_PARTY_SOURCE_CODE_BUCKET}. Error: {e}"
                    )
                    raise

Exemplo n.º 13

0

Exibir arquivo

def _run_dependency_check_test(image, ec2_connection):
    # Record any whitelisted medium/low severity CVEs; I.E. allowed_vulnerabilities = {CVE-1000-5555, CVE-9999-9999}
    allowed_vulnerabilities = {
        # Those vulnerabilities are fixed. Current openssl version is 1.1.1g. These are false positive
        "CVE-2016-2109",
        "CVE-2016-2177",
        "CVE-2016-6303",
        "CVE-2016-2182",
        # CVE-2020-13936: vulnerability found in apache velocity package which is a dependency for dependency-check package. Hence, ignoring.
        "CVE-2020-13936",
    }

    processor = get_processor_from_image_uri(image)

    # Whitelist CVE #CVE-2021-3711 for DLCs where openssl is installed using apt-get
    framework, _ = get_framework_and_version_from_tag(image)
    short_fw_version = re.search(r"(\d+\.\d+)", image).group(1)

    # Check that these versions have been matched on https://ubuntu.com/security/CVE-2021-3711 before adding
    allow_openssl_cve_fw_versions = {
        "tensorflow": {
            "1.15": ["cpu", "gpu", "neuron"],
            "2.3": ["cpu", "gpu"],
            "2.4": ["cpu", "gpu"],
            "2.5": ["cpu", "gpu", "neuron"],
            "2.6": ["cpu", "gpu"],
            "2.7": ["cpu", "gpu"],
        },
        "mxnet": {
            "1.8": ["neuron"],
            "1.9": ["cpu", "gpu"]
        },
        "pytorch": {
            "1.10": ["cpu"]
        },
        "huggingface_pytorch": {
            "1.8": ["cpu", "gpu"],
            "1.9": ["cpu", "gpu"]
        },
        "huggingface_tensorflow": {
            "2.4": ["cpu", "gpu"],
            "2.5": ["cpu", "gpu"]
        },
        "autogluon": {
            "0.3": ["cpu"]
        },
    }

    if processor in allow_openssl_cve_fw_versions.get(framework, {}).get(
            short_fw_version, []):
        allowed_vulnerabilities.add("CVE-2021-3711")

    container_name = f"dep_check_{processor}"
    report_addon = get_container_name("depcheck-report", image)
    dependency_check_report = f"{report_addon}.html"
    html_file = f"{container_name}:/build/dependency-check-report.html"
    test_script = os.path.join(CONTAINER_TESTS_PREFIX, "testDependencyCheck")

    # Execute test, copy results to s3
    ec2.execute_ec2_training_test(ec2_connection,
                                  image,
                                  test_script,
                                  container_name=container_name,
                                  bin_bash_entrypoint=True)
    ec2_connection.run(f"docker cp {html_file} ~/{dependency_check_report}")
    ec2_connection.run(
        f"aws s3 cp ~/{dependency_check_report} s3://dlc-dependency-check")

    # Check for any vulnerabilities not mentioned in allowed_vulnerabilities
    html_output = ec2_connection.run(f"cat ~/{dependency_check_report}",
                                     hide=True).stdout
    cves = re.findall(r">(CVE-\d+-\d+)</a>", html_output)
    vulnerabilities = set(cves) - allowed_vulnerabilities

    if vulnerabilities:
        vulnerability_severity = {}

        # Check NVD for vulnerability severity to provide this useful info in error message.
        for vulnerability in vulnerabilities:
            try:
                cve_url = f"https://services.nvd.nist.gov/rest/json/cve/1.0/{vulnerability}"

                session = requests.Session()
                session.mount(
                    "https://",
                    requests.adapters.HTTPAdapter(max_retries=Retry(
                        total=5, status_forcelist=[404, 504, 502])),
                )
                response = session.get(cve_url)

                if response.status_code == 200:
                    severity = (response.json().get("result", {}).get(
                        "CVE_Items",
                        [{}])[0].get("impact",
                                     {}).get("baseMetricV2",
                                             {}).get("severity", "UNKNOWN"))
                    if vulnerability_severity.get(severity):
                        vulnerability_severity[severity].append(vulnerability)
                    else:
                        vulnerability_severity[severity] = [vulnerability]
            except ConnectionError:
                LOGGER.exception(
                    f"Failed to load NIST data for CVE {vulnerability}")

        # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities
        if not (vulnerability_severity.get("CRITICAL")
                or vulnerability_severity.get("HIGH")):
            return

        raise DependencyCheckFailure(
            f"Unrecognized CVEs have been reported : {vulnerability_severity}. "
            f"Allowed vulnerabilities are {allowed_vulnerabilities or None}. Please see "
            f"{dependency_check_report} for more details.")

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_pre_release.py Projeto: philschmid/deep-learning-containers

def test_framework_version_cpu(image):
    """
    Check that the framework version in the image tag is the same as the one on a running container.
    This function tests CPU, EIA images.

    :param image: ECR image URI
    """
    if "gpu" in image:
        pytest.skip(
            "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu"
        )
    if "neuron" in image:
        pytest.skip(
            "Neuron images will have their framework version tested in test_framework_and_neuron_sdk_version"
        )
    image_repo_name, _ = get_repository_and_tag_from_image_uri(image)
    if re.fullmatch(
            r"(pr-|beta-|nightly-)?tensorflow-inference(-eia|-graviton)?",
            image_repo_name):
        pytest.skip(
            "Non-gpu tensorflow-inference images will be tested in test_tf_serving_version_cpu."
        )

    tested_framework, tag_framework_version = get_framework_and_version_from_tag(
        image)
    # Framework name may include huggingface
    if tested_framework.startswith('huggingface_'):
        tested_framework = tested_framework[len("huggingface_"):]
    # Module name is torch
    if tested_framework == "pytorch":
        tested_framework = "torch"
    elif tested_framework == "autogluon":
        tested_framework = "autogluon.core"
    ctx = Context()
    container_name = get_container_name("framework-version", image)
    start_container(container_name, image, ctx)
    output = run_cmd_on_container(
        container_name,
        ctx,
        f"import {tested_framework}; print({tested_framework}.__version__)",
        executable="python")
    if is_canary_context():
        assert tag_framework_version in output.stdout.strip()
    else:
        if tested_framework == "autogluon.core":
            version_to_check = "0.3.1" if tag_framework_version == "0.3.2" else tag_framework_version
            assert output.stdout.strip().startswith(version_to_check)
        # Habana v1.2 binary does not follow the X.Y.Z+cpu naming convention
        elif "habana" not in image_repo_name:
            if tested_framework == "torch" and Version(
                    tag_framework_version) >= Version("1.10.0"):
                torch_version_pattern = r"{torch_version}(\+cpu)".format(
                    torch_version=tag_framework_version)
                assert re.fullmatch(
                    torch_version_pattern, output.stdout.strip()
                ), (f"torch.__version__ = {output.stdout.strip()} does not match {torch_version_pattern}\n"
                    f"Please specify framework version as X.Y.Z+cpu")
        else:
            if "neuron" in image:
                assert tag_framework_version in output.stdout.strip()
            if all(_string in image
                   for _string in ["pytorch", "habana", "synapseai1.3.0"]):
                # Habana Pytorch version looks like 1.10.0a0+gitb488e78 for SynapseAI1.3 PT1.10.1 images
                pt_fw_version_pattern = r"(\d+(\.\d+){1,2}(-rc\d)?)((a0\+git\w{7}))"
                pt_fw_version_match = re.fullmatch(pt_fw_version_pattern,
                                                   output.stdout.strip())
                # This is desired for PT1.10.1 images
                assert pt_fw_version_match.group(1) == "1.10.0"
            else:
                assert tag_framework_version == output.stdout.strip()
    stop_and_remove_container(container_name, ctx)

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_pre_release.py Projeto: Yixiao99/deep-learning-containers

def _run_dependency_check_test(image, ec2_connection, processor):
    # Record any whitelisted medium/low severity CVEs; I.E. allowed_vulnerabilities = {CVE-1000-5555, CVE-9999-9999}
    allowed_vulnerabilities = {
        # Those vulnerabilities are fixed. Current openssl version is 1.1.1g. These are false positive
        "CVE-2016-2109",
        "CVE-2016-2177",
        "CVE-2016-6303",
        "CVE-2016-2182",
        # CVE-2020-13936: vulnerability found in apache velocity package which is a dependency for dependency-check package. Hence, ignoring.
        "CVE-2020-13936",
    }

    container_name = f"dep_check_{processor}"
    report_addon = get_container_name("depcheck-report", image)
    dependency_check_report = f"{report_addon}.html"
    html_file = f"{container_name}:/build/dependency-check-report.html"
    test_script = os.path.join(CONTAINER_TESTS_PREFIX, "testDependencyCheck")

    # Execute test, copy results to s3
    ec2.execute_ec2_training_test(ec2_connection, image, test_script, container_name=container_name)
    ec2_connection.run(f"docker cp {html_file} ~/{dependency_check_report}")
    ec2_connection.run(f"aws s3 cp ~/{dependency_check_report} s3://dlc-dependency-check")

    # Check for any vulnerabilities not mentioned in allowed_vulnerabilities
    html_output = ec2_connection.run(f"cat ~/{dependency_check_report}", hide=True).stdout
    cves = re.findall(r">(CVE-\d+-\d+)</a>", html_output)
    vulnerabilities = set(cves) - allowed_vulnerabilities

    if vulnerabilities:
        vulnerability_severity = {}

        # Check NVD for vulnerability severity to provide this useful info in error message.
        for vulnerability in vulnerabilities:
            try:
                cve_url = f"https://services.nvd.nist.gov/rest/json/cve/1.0/{vulnerability}"

                session = requests.Session()
                session.mount(
                    "https://",
                    requests.adapters.HTTPAdapter(max_retries=Retry(total=5, status_forcelist=[404, 504, 502])),
                )
                response = session.get(cve_url)

                if response.status_code == 200:
                    severity = (
                        response.json()
                        .get("result", {})
                        .get("CVE_Items", [{}])[0]
                        .get("impact", {})
                        .get("baseMetricV2", {})
                        .get("severity", "UNKNOWN")
                    )
            except ConnectionError:
                LOGGER.exception(f"Failed to load NIST data for CVE {vulnerability}")

            if vulnerability_severity.get(severity):
                vulnerability_severity[severity].append(vulnerability)
            else:
                vulnerability_severity[severity] = [vulnerability]

        # TODO: Remove this once we have whitelisted appropriate LOW/MEDIUM vulnerabilities
        if not (vulnerability_severity.get("CRITICAL") or vulnerability_severity.get("HIGH")):
            return

        raise DependencyCheckFailure(
            f"Unrecognized CVEs have been reported : {vulnerability_severity}. "
            f"Allowed vulnerabilities are {allowed_vulnerabilities or None}. Please see "
            f"{dependency_check_report} for more details."
        )

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_eks_mxnet_training.py Projeto: saimidu/deep-learning-containers

def test_eks_mxnet_dgl_single_node_training(mxnet_training, py3_only):
    """
    Function to create a pod using kubectl and given container image, and run
    DGL training with MXNet backend
    Args:
        :param mxnet_training: the ECR URI
    """

    # TODO: remove/update this when DGL supports MXNet 1.9
    _, framework_version = get_framework_and_version_from_tag(mxnet_training)
    if Version(framework_version) >= Version('1.9.0'):
        pytest.skip("Skipping DGL tests as DGL does not yet support MXNet 1.9")

    training_result = False
    rand_int = random.randint(4001, 6000)

    yaml_path = os.path.join(
        os.sep, "tmp", f"mxnet_single_node_training_dgl_{rand_int}.yaml")
    pod_name = f"mxnet-single-node-training-dgl-{rand_int}"

    ctx = Context()
    # Run container to determine dgl version
    container_name = get_container_name("dgl-mx", mxnet_training)
    ctx.run(f"docker run --name {container_name} -itd {mxnet_training}")

    dgl_version = ctx.run(
        f"docker exec --user root {container_name} python -c 'import dgl; print(dgl.__version__)'"
    ).stdout.strip()
    dgl_major_minor = re.search(r'(^\d+.\d+).', dgl_version).group(1)
    dgl_branch = f"{dgl_major_minor}.x"

    args = (
        f"git clone -b {dgl_branch} https://github.com/dmlc/dgl.git && "
        f"cd /dgl/examples/mxnet/gcn/ && DGLBACKEND=mxnet python train.py --dataset cora"
    )

    # TODO: Change hardcoded value to read a mapping from the EKS cluster instance.
    cpu_limit = 72
    cpu_limit = str(int(cpu_limit) / 2)

    if "gpu" in mxnet_training:
        args = args + " --gpu 0"
    else:
        args = args + " --gpu -1"

    search_replace_dict = {
        "<POD_NAME>": pod_name,
        "<CONTAINER_NAME>": mxnet_training,
        "<ARGS>": args,
        "<CPU_LIMIT>": cpu_limit,
    }

    eks_utils.write_eks_yaml_file_from_template(
        eks_utils.SINGLE_NODE_TRAINING_TEMPLATE_PATH, yaml_path,
        search_replace_dict)

    try:
        run("kubectl create -f {}".format(yaml_path))

        if eks_utils.is_eks_training_complete(pod_name):
            dgl_out = run("kubectl logs {}".format(pod_name)).stdout
            if "Test accuracy" in dgl_out:
                training_result = True
            else:
                eks_utils.LOGGER.info("**** training output ****")
                eks_utils.LOGGER.debug(dgl_out)

        assert training_result, f"Training failed"
    finally:
        run("kubectl delete pods {}".format(pod_name))

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_pre_release.py Projeto: hsl89/deep-learning-containers

def test_framework_version_cpu(image):
    """
    Check that the framework version in the image tag is the same as the one on a running container.
    This function tests CPU, EIA images.

    :param image: ECR image URI
    """
    if "gpu" in image:
        pytest.skip(
            "GPU images will have their framework version tested in test_framework_and_cuda_version_gpu"
        )
    if "neuron" in image:
        pytest.skip(
            "Neuron images will have their framework version tested in test_framework_and_neuron_sdk_version"
        )
    image_repo_name, _ = get_repository_and_tag_from_image_uri(image)
    if re.fullmatch(
            r"(pr-|beta-|nightly-)?tensorflow-inference(-eia|-graviton)?",
            image_repo_name):
        pytest.skip(
            msg=
            "TF inference for CPU/GPU/EIA does not have core tensorflow installed"
        )

    tested_framework, tag_framework_version = get_framework_and_version_from_tag(
        image)

    # Framework name may include huggingface
    if tested_framework.startswith('huggingface_'):
        tested_framework = tested_framework[len("huggingface_"):]
    # Module name is torch
    if tested_framework == "pytorch":
        tested_framework = "torch"
    elif tested_framework == "autogluon":
        tested_framework = "autogluon.core"
    ctx = Context()
    container_name = get_container_name("framework-version", image)
    start_container(container_name, image, ctx)
    output = run_cmd_on_container(
        container_name,
        ctx,
        f"import {tested_framework}; print({tested_framework}.__version__)",
        executable="python")
    if is_canary_context():
        assert tag_framework_version in output.stdout.strip()
    else:
        if tested_framework == "autogluon.core":
            assert output.stdout.strip().startswith(tag_framework_version)
        # Habana v1.2 binary does not follow the X.Y.Z+cpu naming convention
        elif "habana" not in image_repo_name:
            if tested_framework == "torch" and Version(
                    tag_framework_version) >= Version("1.10.0"):
                torch_version_pattern = r"{torch_version}(\+cpu)".format(
                    torch_version=tag_framework_version)
                assert re.fullmatch(
                    torch_version_pattern, output.stdout.strip()
                ), (f"torch.__version__ = {output.stdout.strip()} does not match {torch_version_pattern}\n"
                    f"Please specify framework version as X.Y.Z+cpu")
        else:
            if "neuron" in image:
                assert tag_framework_version in output.stdout.strip()
            else:
                assert tag_framework_version == output.stdout.strip()
    stop_and_remove_container(container_name, ctx)