def _run_instance_role_disabled(image_uri, ec2_client, ec2_instance, ec2_connection): expected_tag_key = "aws-dlc-autogenerated-tag-do-not-delete" ec2_instance_id, _ = ec2_instance account_id = test_utils.get_account_id_from_image_uri(image_uri) image_region = test_utils.get_region_from_image_uri(image_uri) repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri( image_uri) framework, _ = test_utils.get_framework_and_version_from_tag(image_uri) job_type = test_utils.get_job_type_from_image(image_uri) processor = test_utils.get_processor_from_image_uri(image_uri) container_name = f"{repo_name}-telemetry_bad_instance_role-ec2" docker_cmd = "nvidia-docker" if processor == "gpu" else "docker" test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region) ec2_connection.run(f"{docker_cmd} pull -q {image_uri}") preexisting_ec2_instance_tags = ec2_utils.get_ec2_instance_tags( ec2_instance_id, ec2_client=ec2_client) if expected_tag_key in preexisting_ec2_instance_tags: ec2_client.remove_tags(Resources=[ec2_instance_id], Tags=[{ "Key": expected_tag_key }]) # Disable access to EC2 instance metadata ec2_connection.run(f"sudo route add -host 169.254.169.254 reject") if "tensorflow" in framework and job_type == "inference": model_name = "saved_model_half_plus_two" model_base_path = test_utils.get_tensorflow_model_base_path(image_uri) env_vars_list = test_utils.get_tensorflow_inference_environment_variables( model_name, model_base_path) env_vars = " ".join([ f"-e {entry['name']}={entry['value']}" for entry in env_vars_list ]) inference_command = get_tensorflow_inference_command_tf27_above( image_uri, model_name) ec2_connection.run( f"{docker_cmd} run {env_vars} --name {container_name} -id {image_uri} {inference_command}" ) time.sleep(5) else: framework_to_import = framework.replace("huggingface_", "") framework_to_import = "torch" if framework_to_import == "pytorch" else framework_to_import ec2_connection.run( f"{docker_cmd} run --name {container_name} -id {image_uri} bash") output = ec2_connection.run( f"{docker_cmd} exec -i {container_name} python -c 'import {framework_to_import}; import time; time.sleep(5)'", warn=True) assert output.ok, f"'import {framework_to_import}' fails when credentials not configured" ec2_instance_tags = ec2_utils.get_ec2_instance_tags(ec2_instance_id, ec2_client=ec2_client) assert expected_tag_key not in ec2_instance_tags, ( f"{expected_tag_key} was applied as an instance tag." "EC2 create_tags went through even though it should not have")
def test_dlc_standard_labels(image, region): customer_type_label_prefix = "ec2" if test_utils.is_ec2_image( image) else "sagemaker" framework, fw_version = test_utils.get_framework_and_version_from_tag( image) framework = framework.replace('_', '-') fw_version = fw_version.replace('.', '-') device_type = test_utils.get_processor_from_image_uri(image) if device_type == "gpu": cuda_verison = test_utils.get_cuda_version_from_tag(image) device_type = f"{device_type}.{cuda_verison}" python_version = test_utils.get_python_version_from_image_uri(image) job_type = test_utils.get_job_type_from_image(image) transformers_version = test_utils.get_transformers_version_from_image_uri( image).replace('.', '-') os_version = test_utils.get_os_version_from_image_uri(image).replace( '.', '-') # TODO: Add x86 env variable to check explicitly for x86, instead of assuming that everything not graviton is x86 arch_type = "graviton" if test_utils.is_graviton_architecture() else "x86" contributor = test_utils.get_contributor_from_image_uri(image) expected_labels = [ f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.framework.{framework}.{fw_version}", f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.device.{device_type}", f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.python.{python_version}", f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.job.{job_type}", f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.arch.{arch_type}", f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.os.{os_version}", ] if contributor: expected_labels.append( f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.contributor.{contributor}" ) if transformers_version: expected_labels.append( f"com.amazonaws.ml.engines.{customer_type_label_prefix}.dlc.lib.transformers.{transformers_version}" ) actual_labels = test_utils.get_labels_from_ecr_image(image, region) missing_labels = [] for label in expected_labels: if label not in actual_labels: missing_labels.append(label) # TODO: Remove this when ec2 labels are added. For now, ensure they are not added. if customer_type_label_prefix == "ec2": assert set(missing_labels) == set(expected_labels), \ f"EC2 labels are not supported yet, and should not be added to containers. " \ f"{set(expected_labels) - set(missing_labels)} should not be present." else: assert not missing_labels, \ f"Labels {missing_labels} are expected in image {image}, but cannot be found. " \ f"All labels on image: {actual_labels}"
def generate_unique_values_for_fixtures(metafunc_obj, images_to_parametrize, values_to_generate_for_fixture): """ Take a dictionary (values_to_generate_for_fixture), that maps a fixture name used in a test function to another fixture that needs to be parametrized, and parametrize to create unique resources for a test. :param metafunc_obj: pytest metafunc object :param images_to_parametrize: <list> list of image URIs which are used in a test :param values_to_generate_for_fixture: <dict> Mapping of "Fixture used" -> "Fixture to be parametrized" :return: <dict> Mapping of "Fixture to be parametrized" -> "Unique values for fixture to be parametrized" """ job_type_map = {"training": "tr", "inference": "inf"} framework_name_map = { "tensorflow": "tf", "mxnet": "mx", "pytorch": "pt", "huggingface_pytorch": "hf-pt", "huggingface_tensorflow": "hf-tf", "huggingface_pytorch_trcomp": "hf-pt-trc", "huggingface_tensorflow_trcomp": "hf-tf-trc", "autogluon": "ag", } fixtures_parametrized = {} if images_to_parametrize: for key, new_fixture_name in values_to_generate_for_fixture.items(): if key in metafunc_obj.fixturenames: fixtures_parametrized[new_fixture_name] = [] for index, image in enumerate(images_to_parametrize): # Tag fixtures with EC2 instance types if env variable is present allowed_processors = ("gpu", "cpu", "eia", "neuron", "hpu") instance_tag = "" for processor in allowed_processors: if processor in image: if "graviton" in image: instance_type_env = f"EC2_{processor.upper()}_GRAVITON_INSTANCE_TYPE" else: instance_type_env = f"EC2_{processor.upper()}_INSTANCE_TYPE" instance_type = os.getenv(instance_type_env) if instance_type: instance_tag = f"-{instance_type.replace('.', '-')}" break image_tag = image.split(":")[-1].replace(".", "-") framework, _ = get_framework_and_version_from_tag(image) job_type = get_job_type_from_image(image) fixtures_parametrized[new_fixture_name].append(( image, f"{metafunc_obj.function.__name__}-{framework_name_map.get(framework)}-" f"{job_type_map.get(job_type)}-{image_tag}-" f"{os.getenv('CODEBUILD_RESOLVED_SOURCE_VERSION')}-{index}{instance_tag}", )) return fixtures_parametrized
def _run_tag_success(image_uri, ec2_client, ec2_instance, ec2_connection): expected_tag_key = "aws-dlc-autogenerated-tag-do-not-delete" ec2_instance_id, _ = ec2_instance account_id = test_utils.get_account_id_from_image_uri(image_uri) image_region = test_utils.get_region_from_image_uri(image_uri) repo_name, image_tag = test_utils.get_repository_and_tag_from_image_uri( image_uri) framework, _ = test_utils.get_framework_and_version_from_tag(image_uri) job_type = test_utils.get_job_type_from_image(image_uri) processor = test_utils.get_processor_from_image_uri(image_uri) container_name = f"{repo_name}-telemetry_tag_instance_success-ec2" docker_cmd = "nvidia-docker" if processor == "gpu" else "docker" test_utils.login_to_ecr_registry(ec2_connection, account_id, image_region) ec2_connection.run(f"{docker_cmd} pull -q {image_uri}") preexisting_ec2_instance_tags = ec2_utils.get_ec2_instance_tags( ec2_instance_id, ec2_client=ec2_client) if expected_tag_key in preexisting_ec2_instance_tags: ec2_client.remove_tags(Resources=[ec2_instance_id], Tags=[{ "Key": expected_tag_key }]) if framework == "tensorflow" and job_type == "inference": env_vars_list = ecs_utils.get_ecs_tensorflow_environment_variables( processor, "saved_model_half_plus_two") env_vars = " ".join([ f"-e {entry['name']}={entry['value']}" for entry in env_vars_list ]) ec2_connection.run( f"{docker_cmd} run {env_vars} --name {container_name} -id {image_uri}" ) time.sleep(5) else: framework_to_import = framework.replace("huggingface_", "") framework_to_import = "torch" if framework_to_import == "pytorch" else framework_to_import ec2_connection.run( f"{docker_cmd} run --name {container_name} -id {image_uri} bash") output = ec2_connection.run( f"{docker_cmd} exec -i {container_name} python -c 'import {framework_to_import}; import time; time.sleep(5)'", warn=True) ec2_instance_tags = ec2_utils.get_ec2_instance_tags(ec2_instance_id, ec2_client=ec2_client) assert expected_tag_key in ec2_instance_tags, f"{expected_tag_key} was not applied as an instance tag"
def test_dlc_major_version_dockerfiles(image): """ Test to make sure semantic versioning scheme in Dockerfiles is correct :param image: <str> ECR image URI """ dlc_dir = os.getcwd().split(f"{os.sep}test{os.sep}")[0] job_type = test_utils.get_job_type_from_image(image) framework, fw_version = test_utils.get_framework_and_version_from_tag( image) processor = test_utils.get_processor_from_image_uri(image) # Assign a string of numbers associated with python version in tag. Python major version is not sufficient to # define DLC major version python_major_minor_version = re.search(r"-py(\d{2,})", image).group(1) root_dir = os.path.join(dlc_dir, framework, job_type, "docker") # Skip older FW versions that did not use this versioning scheme references = { "tensorflow2": "2.2.0", "tensorflow1": "1.16.0", "mxnet": "1.7.0", "pytorch": "1.5.0" } if test_utils.is_tf_version("1", image): reference_fw = "tensorflow1" elif test_utils.is_tf_version("2", image): reference_fw = "tensorflow2" else: reference_fw = framework if processor != "eia" and ( reference_fw in references and Version(fw_version) < Version(references[reference_fw])): pytest.skip( f"Not enforcing new versioning scheme on old image {image}. " f"Started enforcing version scheme on the following: {references}") # Find all Dockerfile.<processor> for this framework/job_type's Major.Minor version dockerfiles = [] fw_version_major_minor = re.match(r"(\d+\.\d+)", fw_version).group(1) for root, dirnames, filenames in os.walk(root_dir): for filename in filenames: if filename == f"Dockerfile.{processor}": dockerfile_path = os.path.join(root_dir, root, filename) if "example" not in dockerfile_path and f"{os.sep}{fw_version_major_minor}" in dockerfile_path: dockerfiles.append(dockerfile_path) # For the collected dockerfiles above, note the DLC major versions in each Dockerfile if python version matches # the current image under test versions = {} dlc_label_regex = re.compile(r'LABEL dlc_major_version="(\d+)"') python_version_regex = re.compile(r"ARG PYTHON_VERSION=(\d+\.\d+)") for dockerfile in dockerfiles: with open(dockerfile, "r") as df: dlc_version = None python_version = None for line in df: major_version_match = dlc_label_regex.match(line) python_version_match = python_version_regex.match(line) if major_version_match: dlc_version = int(major_version_match.group(1)) elif python_version_match: python_version = python_version_match.group(1).replace( ".", "") # Raise errors if dlc major version label and python version arg are not found in Dockerfile if not dlc_version: raise DLCMajorVersionLabelNotFound( f"Cannot find dlc_major_version label in {dockerfile}") if not python_version: raise DLCPythonVersionNotFound( f"Cannot find PYTHON_VERSION arg in {dockerfile}") if python_version == python_major_minor_version: versions[dockerfile] = dlc_version expected_versions = list(range(1, len(dockerfiles) + 1)) actual_versions = sorted(versions.values()) # Test case explicitly for TF2.3 gpu, since v1.0 is banned if (framework, fw_version_major_minor, processor, python_major_minor_version, job_type) == ( "tensorflow", "2.3", "gpu", "37", "training", ): expected_versions = [v + 1 for v in expected_versions] assert 1 not in actual_versions, ( f"DLC v1.0 is deprecated in TF2.3 gpu containers, but found major version 1 " f"in one of the Dockerfiles. Please inspect {versions}") # Test case explicitly for PyTorch 1.6.0 training gpu, since v2.0 is banned if (framework, fw_version_major_minor, processor, python_major_minor_version, job_type) == ( "pytorch", "1.6", "gpu", "36", "training", ): expected_versions = [v + 1 for v in expected_versions] expected_versions[0] = 1 assert 2 not in actual_versions, ( f"DLC v2.0 is deprecated in PyTorch 1.6.0 gpu containers, but found major version 2 " f"in one of the Dockerfiles. Please inspect {versions}") # Note: If, for example, we find 3 dockerfiles with the same framework major/minor version, same processor, # and same python major/minor version, we will expect DLC major versions 1, 2, and 3. If an exception needs to be # made to this rule, please see the above handling of TF2.3 as an example. assert actual_versions == expected_versions, ( f"Found DLC major versions {actual_versions} but expected {expected_versions} for " f"{framework} {job_type} {processor}. Full version info: {versions}. Py version: {python_major_minor_version}" )