def fetch_dlc_images_for_test_jobs(images, use_latest_additional_tag=False): """ use the JobParamters.run_test_types values to pass on image ecr urls to each test type. :param images: list :return: dictionary """ DLC_IMAGES = { "sagemaker": [], "ecs": [], "eks": [], "ec2": [], "sanity": [] } build_disabled = not is_build_enabled() for docker_image in images: if not docker_image.is_test_promotion_enabled: continue use_preexisting_images = (build_disabled and docker_image.build_status == constants.NOT_BUILT) if docker_image.build_status == constants.SUCCESS or use_preexisting_images: ecr_url_to_test = docker_image.ecr_url if use_latest_additional_tag and len( docker_image.additional_tags) > 0: ecr_url_to_test = f"{docker_image.repository}:{docker_image.additional_tags[-1]}" # Run sanity tests on the all images built DLC_IMAGES["sanity"].append(ecr_url_to_test) image_job_type = docker_image.info.get("image_type") image_device_type = docker_image.info.get("device_type") image_python_version = docker_image.info.get("python_version") image_tag = f"{image_job_type}_{image_device_type}_{image_python_version}" # when image_run_test_types has key all values can be (all , ecs, eks, ec2, sagemaker) if constants.ALL in JobParameters.image_run_test_types.keys(): run_tests = JobParameters.image_run_test_types.get( constants.ALL) run_tests = (constants.ALL_TESTS if constants.ALL in run_tests else run_tests) for test in run_tests: DLC_IMAGES[test].append(ecr_url_to_test) # when key is training or inference values can be (ecs, eks, ec2, sagemaker) if image_job_type in JobParameters.image_run_test_types.keys(): run_tests = JobParameters.image_run_test_types.get( image_job_type) for test in run_tests: DLC_IMAGES[test].append(ecr_url_to_test) # when key is image_tag (training-cpu-py3) values can be (ecs, eks, ec2, sagemaker) if image_tag in JobParameters.image_run_test_types.keys(): run_tests = JobParameters.image_run_test_types.get(image_tag) run_tests = (constants.ALL_TESTS if constants.ALL in run_tests else run_tests) for test in run_tests: DLC_IMAGES[test].append(ecr_url_to_test) for test_type in DLC_IMAGES.keys(): test_images = DLC_IMAGES[test_type] if test_images: DLC_IMAGES[test_type] = list(set(test_images)) return DLC_IMAGES
def build_setup(framework, device_types=None, image_types=None, py_versions=None): """ Setup the appropriate environment variables depending on whether this is a PR build or a dev build Parameters: framework: str device_types: [str] image_types: [str] py_versions: [str] Returns: None """ # Set necessary environment variables to_build = { "device_types": constants.DEVICE_TYPES, "image_types": constants.IMAGE_TYPES, "py_versions": constants.PYTHON_VERSIONS, } build_context = os.environ.get("BUILD_CONTEXT") enable_build = is_build_enabled() if build_context == "PR": pr_number = os.getenv("CODEBUILD_SOURCE_VERSION") LOGGER.info(f"pr number: {pr_number}") if pr_number is not None: pr_number = int(pr_number.split("/")[-1]) device_types, image_types, py_versions = pr_build_setup( pr_number, framework) if device_types != constants.ALL: to_build["device_types"] = constants.DEVICE_TYPES.intersection( set(device_types)) if image_types != constants.ALL: to_build["image_types"] = constants.IMAGE_TYPES.intersection( set(image_types)) if py_versions != constants.ALL: to_build["py_versions"] = constants.PYTHON_VERSIONS.intersection( set(py_versions)) for device_type in to_build["device_types"]: for image_type in to_build["image_types"]: for py_version in to_build["py_versions"]: env_variable = f"{framework.upper()}_{device_type.upper()}_{image_type.upper()}_{py_version.upper()}" if enable_build or build_context != "PR": os.environ[env_variable] = "true"
def image_builder(buildspec): BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) PRE_PUSH_STAGE_IMAGES = [] COMMON_STAGE_IMAGES = [] if "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str( BUILDSPEC["framework"]) or "trcomp" in str(BUILDSPEC["framework"]): os.system("echo login into public ECR") os.system( "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com" ) for image_name, image_config in BUILDSPEC["images"].items(): ARTIFACTS = deepcopy( BUILDSPEC["context"]) if BUILDSPEC.get("context") else {} extra_build_args = {} labels = {} enable_datetime_tag = parse_dlc_developer_configs( "build", "datetime_tag") if image_config.get("version") is not None: if BUILDSPEC["version"] != image_config.get("version"): continue if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) image_tag = tag_image_with_pr_number( image_config["tag"] ) if build_context == "PR" else image_config["tag"] if enable_datetime_tag or build_context != "PR": image_tag = tag_image_with_datetime(image_tag) image_repo_uri = (image_config["repository"] if build_context == "PR" else modify_repository_name_for_context( str(image_config["repository"]), build_context)) base_image_uri = None if image_config.get("base_image_name") is not None: base_image_object = _find_image_object( PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"]) base_image_uri = base_image_object.ecr_url if image_config.get("download_artifacts") is not None: for artifact_name, artifact in image_config.get( "download_artifacts").items(): type = artifact["type"] uri = artifact["URI"] var = artifact["VAR_IN_DOCKERFILE"] try: file_name = utils.download_file(uri, type).strip() except ValueError: FORMATTER.print( f"Artifact download failed: {uri} of type {type}.") ARTIFACTS.update({ f"{artifact_name}": { "source": f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}", "target": file_name, } }) extra_build_args[var] = file_name labels[var] = file_name labels[f"{var}_URI"] = uri transformers_version = image_config.get("transformers_version") if str(BUILDSPEC["framework"]).startswith("huggingface") or str( BUILDSPEC["framework"]).endswith("trcomp"): if transformers_version: extra_build_args["TRANSFORMERS_VERSION"] = transformers_version else: raise KeyError( f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image" ) if "datasets_version" in image_config: extra_build_args["DATASETS_VERSION"] = image_config.get( "datasets_version") elif str(image_config["image_type"]) == "training": raise KeyError( f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image" ) ARTIFACTS.update({ "dockerfile": { "source": image_config["docker_file"], "target": "Dockerfile", } }) context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"]) if "labels" in image_config: labels.update(image_config.get("labels")) cx_type = utils.get_label_prefix_customer_type(image_tag) # Define label variables label_framework = str(BUILDSPEC['framework']).replace('_', '-') if image_config.get("framework_version"): label_framework_version = str( image_config['framework_version']).replace('.', '-') else: label_framework_version = str(BUILDSPEC['version']).replace( '.', '-') label_device_type = str(image_config['device_type']) if label_device_type == "gpu": label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}" label_arch = str(BUILDSPEC['arch_type']) label_python_version = str(image_config['tag_python_version']) label_os_version = str(image_config.get('os_version')).replace( '.', '-') label_contributor = str(BUILDSPEC.get('contributor')) label_transformers_version = str(transformers_version).replace( '.', '-') # job_type will be either inference or training, based on the repo URI if "training" in image_repo_uri: label_job_type = "training" elif "inference" in image_repo_uri: label_job_type = "inference" else: raise RuntimeError( f"Cannot find inference or training job type in {image_repo_uri}. " f"This is required to set job_type label.") if cx_type == "sagemaker": # Adding standard labels to all images labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true" # python version label will look like py_version.py36, for example labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true" labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true" if label_contributor: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}"] = "true" if transformers_version: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}"] = "true" """ Override parameters from parent in child. """ info = { "account_id": str(BUILDSPEC["account_id"]), "region": str(BUILDSPEC["region"]), "framework": str(BUILDSPEC["framework"]), "version": str(BUILDSPEC["version"]), "root": str(image_config["root"]), "name": str(image_name), "device_type": str(image_config["device_type"]), "python_version": str(image_config["python_version"]), "image_type": str(image_config["image_type"]), "image_size_baseline": int(image_config["image_size_baseline"]), "base_image_uri": base_image_uri, "enable_test_promotion": image_config.get("enable_test_promotion", True), "labels": labels, "extra_build_args": extra_build_args, } # Create pre_push stage docker object pre_push_stage_image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, tag=append_tag(image_tag, "pre-push"), to_build=image_config["build"], stage=constants.PRE_PUSH_STAGE, context=context, additional_tags=[image_tag], target=image_config.get("target"), ) ##### Create Common stage docker object ##### # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image # to the repository. Instead, we just push its common stage image to the repository. Therefore, # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable. common_stage_image_object = generate_common_stage_image_object( pre_push_stage_image_object, image_tag) COMMON_STAGE_IMAGES.append(common_stage_image_object) PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object) FORMATTER.separator() FORMATTER.banner("DLC") # Parent images do not inherit from any containers built in this job # Child images use one of the parent images as their base image parent_images = [ image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image ] child_images = [ image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image ] ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES IMAGES_TO_PUSH = [ image for image in ALL_IMAGES if image.to_push and image.to_build ] pushed_images = [] pushed_images += process_images(parent_images, "Parent/Independent") pushed_images += process_images(child_images, "Child/Dependent") assert all(image in pushed_images for image in IMAGES_TO_PUSH), "Few images could not be pushed." # After the build, display logs/summary for all the images. FORMATTER.banner("Summary") show_build_info(ALL_IMAGES) FORMATTER.banner("Errors") is_any_build_failed, is_any_build_failed_size_limit = show_build_errors( ALL_IMAGES) # From all images, filter the images that were supposed to be built and upload their metrics BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build] FORMATTER.banner("Upload Metrics") upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit) FORMATTER.banner("Test Env") # Set environment variables to be consumed by test jobs test_trigger_job = get_codebuild_project_name() # Tests should only run on images that were pushed to the repository if not is_build_enabled(): # Ensure we have images populated if do_build is false, so that tests can proceed if needed images_to_test = [image for image in ALL_IMAGES if image.to_push] else: images_to_test = IMAGES_TO_PUSH utils.set_test_env( images_to_test, use_latest_additional_tag=True, BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), TEST_TRIGGER=test_trigger_job, )