def sagemaker_logs_metrics_policy_document(scope, id): policy = iam.Policy( scope, id, statements=[ iam.PolicyStatement( actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:DescribeLogStreams", "logs:GetLogEvents", "logs:PutLogEvents", ], resources=[ f"arn:{core.Aws.PARTITION}:logs:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:log-group:/aws/sagemaker/*" ], ), iam.PolicyStatement( actions=[ "cloudwatch:PutMetricData", ], resources=["*"], ), ], ) policy.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy() return policy
def create_monitoring_schedule( scope, blueprint_bucket, assets_bucket, baseline_job_output_location, baseline_job_name, monitoring_schedual_name, monitoring_output_location, schedule_expression, endpoint_name, instance_type, instance_volume_size, max_runtime_seconds, monitoring_type, stack_name, ): """ create_monitoring_schedule creates a model monitoring job in a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :assets_bucket: the bucket cdk object where pipeline assets are stored :baseline_job_output_location: S3 prefix in the S3 assets bucket to store the output of the job :baseline_job_name: name of the baseline job :monitoring_schedual_name: name of the monitoring job to be created :schedule_expression cron job expression :endpoint_name: name of the deployed SageMaker endpoint to be monitored :instance_type: compute instance type for the baseline job, in the form of a CDK CfnParameter object :instance_volume_size: volume size of the EC2 instance :monitoring_type: type of monitoring to be created :max_runtime_seconds: max time the job is allowd to run :stack_name: name of the model monitoring satck :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ create_monitoring_schedule_policy = iam.PolicyStatement( actions=[ "sagemaker:DescribeEndpointConfig", "sagemaker:DescribeEndpoint", "sagemaker:CreateMonitoringSchedule", "sagemaker:DescribeMonitoringSchedule", "sagemaker:StopMonitoringSchedule", "sagemaker:DeleteMonitoringSchedule", "sagemaker:DescribeProcessingJob", ], resources=[ (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"endpoint/{endpoint_name.value_as_string}*"), (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"endpoint-config/{endpoint_name.value_as_string}*"), (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"monitoring-schedule/{monitoring_schedual_name.value_as_string}" ), (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"processing-job/{baseline_job_name.value_as_string}"), ], ) s3_policy = iam.PolicyStatement( actions=[ "s3:ListBucket", "s3:GetObject", "s3:PutObject", ], resources=[ assets_bucket.bucket_arn, assets_bucket.arn_for_objects("*"), ], ) sagemaker_logs_policy = iam.PolicyStatement( actions=[ "cloudwatch:PutMetricData", "logs:CreateLogStream", "logs:PutLogEvents", "logs:CreateLogGroup", "logs:DescribeLogStreams", ], resources=["*"], ) # create sagemaker role sagemaker_role = iam.Role( scope, "create_monitoring_scheduale_sagemaker_role", assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), description= "Role that is create sagemaker model Lambda function assumes to create a model in the pipeline.", ) # create a trust relation to assume the Role sagemaker_role.add_to_policy( iam.PolicyStatement(actions=["sts:AssumeRole"], resources=[sagemaker_role.role_arn])) # creating a role so that this lambda can create a baseline job lambda_role = iam.Role( scope, "create_monitoring_scheduale_role", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), description= "Role that is create_data_baseline_job Lambda function assumes to create a baseline job in the pipeline.", ) sagemaker_role.add_to_policy(create_monitoring_schedule_policy) sagemaker_role.add_to_policy(sagemaker_logs_policy) sagemaker_role.add_to_policy(s3_policy) sagemaker_role_nodes = sagemaker_role.node.find_all() sagemaker_role_nodes[ 2].node.default_child.cfn_options.metadata = suppress_pipeline_policy( ) lambda_role.add_to_policy( iam.PolicyStatement(actions=["iam:PassRole"], resources=[sagemaker_role.role_arn])) lambda_role.add_to_policy(create_monitoring_schedule_policy) lambda_role.add_to_policy(s3_policy) add_logs_policy(lambda_role) # defining the lambda function that gets invoked in this stage create_moniroring_schedule_lambda = lambda_.Function( scope, "create_moniroring_schedule", runtime=lambda_.Runtime.PYTHON_3_8, handler="main.handler", role=lambda_role, code=lambda_.Code.from_bucket( blueprint_bucket, "blueprints/byom/lambdas/create_model_monitoring_schedule.zip"), environment={ "BASELINE_JOB_NAME": baseline_job_name.value_as_string, "BASELINE_JOB_OUTPUT_LOCATION": baseline_job_output_location.value_as_string, "ASSETS_BUCKET": assets_bucket.bucket_name, "SAGEMAKER_ENDPOINT_NAME": f"{endpoint_name.value_as_string}", "MONITORING_SCHEDULE_NAME": monitoring_schedual_name.value_as_string, "MONITORING_OUTPUT_LOCATION": monitoring_output_location.value_as_string, "SCHEDULE_EXPRESSION": schedule_expression.value_as_string, "INSTANCE_TYPE": instance_type.value_as_string, "INSTANCE_VOLUME_SIZE": instance_volume_size.value_as_string, "MAX_RUNTIME_SECONDS": max_runtime_seconds.value_as_string, "ROLE_ARN": sagemaker_role.role_arn, "MONITORING_TYPE": monitoring_type.value_as_string, "STACK_NAME": stack_name, "LOG_LEVEL": "INFO", }, timeout=core.Duration.minutes(10), ) create_moniroring_schedule_lambda.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy( ) role_child_nodes = create_moniroring_schedule_lambda.role.node.find_all() role_child_nodes[ 2].node.default_child.cfn_options.metadata = suppress_pipeline_policy( ) # Create codepipeline action create_moniroring_schedule_action = codepipeline_actions.LambdaInvokeAction( action_name="create_monitoring_schedule", inputs=[], outputs=[], variables_namespace="monitoring_schedule", lambda_=create_moniroring_schedule_lambda, run_order=2, # this runs second in the deploy stage ) return (create_moniroring_schedule_lambda.function_arn, create_moniroring_schedule_action)
def create_model( scope, blueprint_bucket, assets_bucket, model_name, model_artifact_location, custom_container, model_framework, model_framework_version, container_uri, sm_layer, ): """ create_model creates a sagemaker model in a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :assets_bucket: the bucket cdk object where pipeline assets are stored :model_name: name of the sagemaker model to be created, in the form of a CDK CfnParameter object :model_artifact_location: path to the model artifact in the S3 bucket: assets_bucket :custom_container: whether to the model is a custom algorithm or a sagemaker algorithmm, in the form of a CDK CfnParameter object :model_framework: name of the framework if the model is a sagemaker algorithm, in the form of a CDK CfnParameter object :model_framework_version: version of the framework if the model is a sagemaker algorithm, in the form of a CDK CfnParameter object :container_uri: URI for the container registry that stores the model if the model is a custom algorithm :sm_layer: sagemaker lambda layer :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ create_model_policy = iam.PolicyStatement( actions=[ "sagemaker:CreateModel", "sagemaker:DescribeModel", "sagemaker:DeleteModel", ], resources=[ # Lambda that uses this polict requires access to all objects in the assets bucket f"arn:{core.Aws.PARTITION}:s3:::{assets_bucket.bucket_name}/*", (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}" f":model/{model_name.value_as_string}"), ], ) s3_policy = iam.PolicyStatement( actions=[ "s3:GetObject", "s3:PutObject", "s3:ListBucket", ], resources=[ assets_bucket.arn_for_objects("*"), assets_bucket.bucket_arn ], ) # creating this policy for sagemaker create endpoint in custom model ecr_policy = iam.PolicyStatement( actions=[ "ecr:BatchGetImage", "ecr:BatchCheckLayerAvailability", "ecr:DescribeImages", "ecr:DescribeRepositories", "ecr:GetDownloadUrlForLayer", ], resources=[ f"arn:{core.Aws.PARTITION}:ecr:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:repository/awsmlopsmodels*" ], ) ecr_token_policy = iam.PolicyStatement( actions=["ecr:GetAuthorizationToken"], resources=[ "*" ], # GetAuthorizationToken can not be bound to resources other than * ) # creating a role for the lambda function so that it can create a model in sagemaker sagemaker_role = iam.Role( scope, "create_model_sagemaker_role", assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), description= "Role that is create sagemaker model Lambda function assumes to create a model in the pipeline.", ) lambda_role = iam.Role( scope, "create_model_lambda_role", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), description= "Role that is create sagemaker model Lambda function assumes to create a model in the pipeline.", ) sagemaker_role.add_to_policy(create_model_policy) sagemaker_role.add_to_policy(s3_policy) sagemaker_role.add_to_policy(ecr_policy) sagemaker_role.add_to_policy(ecr_token_policy) sagemaker_role_nodes = sagemaker_role.node.find_all() sagemaker_role_nodes[ 2].node.default_child.cfn_options.metadata = suppress_ecr_policy() lambda_role.add_to_policy( iam.PolicyStatement(actions=["iam:PassRole"], resources=[sagemaker_role.role_arn])) lambda_role.add_to_policy(create_model_policy) lambda_role.add_to_policy(s3_policy) add_logs_policy(lambda_role) # defining the lambda function that gets invoked by codepipeline in this step create_sagemaker_model = lambda_.Function( scope, "create_sagemaker_model", runtime=lambda_.Runtime.PYTHON_3_8, handler="main.handler", timeout=core.Duration.seconds(60), code=lambda_.Code.from_bucket( blueprint_bucket, "blueprints/byom/lambdas/create_sagemaker_model.zip"), layers=[sm_layer], role=lambda_role, environment={ "custom_container": custom_container.value_as_string, "model_framework": model_framework.value_as_string, "model_framework_version": model_framework_version.value_as_string, "model_name": model_name.value_as_string, "model_artifact_location": assets_bucket.s3_url_for_object( model_artifact_location.value_as_string), "create_model_role_arn": sagemaker_role.role_arn, "container_uri": container_uri, "LOG_LEVEL": "INFO", }, ) create_sagemaker_model.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy( ) role_child_nodes = create_sagemaker_model.role.node.find_all() role_child_nodes[ 2].node.default_child.cfn_options.metadata = suppress_pipeline_policy( ) # creating the codepipeline action that invokes create model lambda create_sagemaker_model_action = codepipeline_actions.LambdaInvokeAction( action_name="create_sagemaker_model", inputs=[], outputs=[], lambda_=create_sagemaker_model, run_order=1, # runs first in the Deploy stage ) return (create_sagemaker_model.function_arn, create_sagemaker_model_action)
def batch_transform( scope, blueprint_bucket, assets_bucket, model_name, inference_instance, batch_inference_data, sm_layer, ): """ batch_transform creates a sagemaker batch transform job in a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :assets_bucket: the bucket cdk object where pipeline assets are stored :model_name: name of the sagemaker model to be created, in the form of a CDK CfnParameter object :inference_instance: compute instance type for the sagemaker inference endpoint, in the form of a CDK CfnParameter object :batch_inference_data: location of the batch inference data in assets bucket, in the form of a CDK CfnParameter object :is_batch_transform: a CDK CfnCondition object that says if inference type is batch transform or not :sm_layer: sagemaker lambda layer :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ batch_transform_policy = iam.PolicyStatement( actions=[ "sagemaker:CreateTransformJob", "s3:ListBucket", "s3:GetObject", "s3:PutObject", ], resources=[ assets_bucket.bucket_arn, assets_bucket.arn_for_objects("*"), (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"transform-job/{model_name.value_as_string}-*"), ], ) lambda_role = iam.Role( scope, "batch_transform_lambda_role", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), description= ("Role that creates a lambda function assumes to create a sagemaker batch transform " "job in the aws mlops pipeline."), ) lambda_role.add_to_policy(batch_transform_policy) lambda_role.add_to_policy(codepipeline_policy()) add_logs_policy(lambda_role) # defining batch transform lambda function batch_transform = lambda_.Function( scope, "batch_transform", runtime=lambda_.Runtime.PYTHON_3_8, handler="main.handler", layers=[sm_layer], role=lambda_role, code=lambda_.Code.from_bucket( blueprint_bucket, "blueprints/byom/lambdas/batch_transform.zip"), environment={ "model_name": model_name.value_as_string, "inference_instance": inference_instance.value_as_string, "assets_bucket": assets_bucket.bucket_name, "batch_inference_data": batch_inference_data.value_as_string, "LOG_LEVEL": "INFO", }, ) batch_transform.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy( ) role_child_nodes = batch_transform.role.node.find_all() role_child_nodes[ 2].node.default_child.cfn_options.metadata = suppress_pipeline_policy( ) batch_transform_action = codepipeline_actions.LambdaInvokeAction( action_name="batch_transform", inputs=[], outputs=[], variables_namespace="batch_transform", lambda_=batch_transform, run_order=2, # this runs second in the deploy stage ) return (batch_transform.function_arn, batch_transform_action)
def create_endpoint(scope, blueprint_bucket, assets_bucket, model_name, inference_instance): """ create_endpoint creates a sagemaker inference endpoint in a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :assets_bucket: the bucket cdk object where pipeline assets are stored :model_name: name of the sagemaker model to be created, in the form of a CDK CfnParameter object :inference_instance: compute instance type for the sagemaker inference endpoint, in the form of a CDK CfnParameter object :is_realtime_inference: a CDK CfnCondition object that says if inference type is realtime or not :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ create_endpoint_policy = iam.PolicyStatement( actions=[ "sagemaker:CreateEndpoint", "sagemaker:CreateEndpointConfig", "sagemaker:DeleteEndpointConfig", "sagemaker:DescribeEndpointConfig", "sagemaker:DescribeEndpoint", ], resources=[ (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"endpoint/{model_name.value_as_string}-endpoint"), (f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:" f"endpoint-config/{model_name.value_as_string}-endpoint-config"), ], ) # creating a role so that this lambda can create a sagemaker endpoint and endpoint config lambda_role = iam.Role( scope, "create_endpoint_lambda_role", assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"), description= "Role that is create sagemaker model Lambda function assumes to create a model in the pipeline.", ) lambda_role.add_to_policy(create_endpoint_policy) add_logs_policy(lambda_role) # defining the lambda function that gets invoked in this stage create_sagemaker_endpoint = lambda_.Function( scope, "create_sagemaker_endpoint", runtime=lambda_.Runtime.PYTHON_3_8, handler="main.handler", role=lambda_role, code=lambda_.Code.from_bucket( blueprint_bucket, "blueprints/byom/lambdas/create_sagemaker_endpoint.zip"), environment={ "model_name": model_name.value_as_string, "inference_instance": inference_instance.value_as_string, "assets_bucket": assets_bucket.bucket_name, "LOG_LEVEL": "INFO", }, timeout=core.Duration.minutes(10), ) create_sagemaker_endpoint.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy( ) role_child_nodes = create_sagemaker_endpoint.role.node.find_all() role_child_nodes[ 2].node.default_child.cfn_options.metadata = suppress_pipeline_policy( ) # create_endpoint_action = core.Fn.condition_if("isRealtimeInference", create_endpoint_action = codepipeline_actions.LambdaInvokeAction( action_name="create_sagemaker_endpoint", inputs=[], outputs=[], variables_namespace="sagemaker_endpoint", lambda_=create_sagemaker_endpoint, run_order=2, # this runs second in the deploy stage ) return (create_sagemaker_endpoint.function_arn, create_endpoint_action)
def configure_inference(scope, blueprint_bucket): """ configure_inference updates inference lambda function's environment variables and puts the value for Sagemaker endpoint URI as a lambda invoked codepipeline action :scope: CDK Construct scope that's needed to create CDK resources :blueprint_bucket: CDK object of the blueprint bucket that contains resources for BYOM pipeline :is_realtime_inference: a CDK CfnCondition object that says if inference type is realtime or not :return: codepipeline action in a form of a CDK object that can be attached to a codepipeline stage """ # provision api gateway and lambda for inference using solution constructs inference_api_gateway = aws_apigateway_lambda.ApiGatewayToLambda( scope, "BYOMInference", lambda_function_props={ "runtime": lambda_.Runtime.PYTHON_3_8, "handler": "main.handler", "code": lambda_.Code.from_bucket(blueprint_bucket, "blueprints/byom/lambdas/inference.zip"), }, api_gateway_props={ "defaultMethodOptions": { "authorizationType": apigw.AuthorizationType.IAM, }, "restApiName": f"{core.Aws.STACK_NAME}-inference", "proxy": False, }, ) provision_resource = inference_api_gateway.api_gateway.root.add_resource("inference") provision_resource.add_method("POST") inference_api_gateway.lambda_function.add_to_role_policy( iam.PolicyStatement( actions=[ "sagemaker:InvokeEndpoint", ], resources=[ f"arn:{core.Aws.PARTITION}:sagemaker:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:endpoint/*", ], ) ) # lambda function that gets invoked from codepipeline configure_inference_lambda = lambda_.Function( scope, "configure_inference_lambda", runtime=lambda_.Runtime.PYTHON_3_8, handler="main.handler", code=lambda_.Code.from_bucket(blueprint_bucket, "blueprints/byom/lambdas/configure_inference_lambda.zip"), environment={ "inference_lambda_arn": inference_api_gateway.lambda_function.function_arn, "LOG_LEVEL": "INFO", }, ) configure_inference_lambda.node.default_child.cfn_options.metadata = suppress_cloudwatch_policy() # iam permissions to respond to codepipeline and update inference lambda configure_inference_lambda.add_to_role_policy( iam.PolicyStatement( actions=[ "lambda:UpdateFunctionConfiguration", ], resources=[inference_api_gateway.lambda_function.function_arn], ) ) configure_inference_lambda.add_to_role_policy(codepipeline_policy()) role_child_nodes = configure_inference_lambda.role.node.find_all() role_child_nodes[2].node.default_child.cfn_options.metadata = { "cfn_nag": { "rules_to_suppress": [ { "id": "W12", "reason": ( "The codepipeline permissions PutJobSuccessResult and PutJobFailureResult " "are not able to be bound to resources." ), } ] } } # configuring codepipeline action to invoke the lambda configure_inference_action = codepipeline_actions.LambdaInvokeAction( action_name="configure_inference_lambda", inputs=[], outputs=[], # passing the parameter from the last stage in pipeline user_parameters=[{"endpointName": "#{sagemaker_endpoint.endpointName}"}], lambda_=configure_inference_lambda, ) return (configure_inference_lambda.function_arn, configure_inference_action)