def get_notebook_prefix(self): if self._is_solution_build(): prefix = Fn.sub( "${prefix}/notebooks", variables={ "prefix": Fn.find_in_map("SourceCode", "General", "KeyPrefix") }, ) else: prefix = "notebooks" return Fn.base64(prefix)
def get_notebook_source(self, data_bucket: IBucket): if self._is_solution_build(): notebook_source_bucket = Fn.sub( "${bucket}-${region}", variables={ "bucket": Fn.find_in_map("SourceCode", "General", "S3Bucket"), "region": Aws.REGION, }, ) else: notebook_source_bucket = data_bucket.bucket_name return Fn.base64(notebook_source_bucket)
def __init__( self, scope: Construct, construct_id: str, *, deploy_env: str, processing_assets_table: aws_dynamodb.Table, ): # pylint: disable=too-many-locals super().__init__(scope, construct_id) if deploy_env == "prod": instance_types = [ aws_ec2.InstanceType("c5.xlarge"), aws_ec2.InstanceType("c5.2xlarge"), aws_ec2.InstanceType("c5.4xlarge"), aws_ec2.InstanceType("c5.9xlarge"), ] else: instance_types = [ aws_ec2.InstanceType("m5.large"), aws_ec2.InstanceType("m5.xlarge"), ] ec2_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2ContainerServiceforEC2Role") batch_instance_role = aws_iam.Role( self, "batch-instance-role", assumed_by=aws_iam.ServicePrincipal( "ec2.amazonaws.com"), # type: ignore[arg-type] managed_policies=[ec2_policy], ) processing_assets_table.grant_read_write_data( batch_instance_role) # type: ignore[arg-type] batch_instance_profile = aws_iam.CfnInstanceProfile( self, "batch-instance-profile", roles=[batch_instance_role.role_name], ) batch_launch_template_data = textwrap.dedent(""" MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="==MYBOUNDARY==" --==MYBOUNDARY== Content-Type: text/x-shellscript; charset="us-ascii" #!/bin/bash echo ECS_IMAGE_PULL_BEHAVIOR=prefer-cached >> /etc/ecs/ecs.config --==MYBOUNDARY==-- """) launch_template_data = aws_ec2.CfnLaunchTemplate.LaunchTemplateDataProperty( user_data=Fn.base64(batch_launch_template_data.strip())) cloudformation_launch_template = aws_ec2.CfnLaunchTemplate( self, "batch-launch-template", launch_template_name=f"{deploy_env}-datalake-batch-launch-template", launch_template_data=launch_template_data, ) assert cloudformation_launch_template.launch_template_name is not None launch_template = aws_batch.LaunchTemplateSpecification( launch_template_name=cloudformation_launch_template. launch_template_name) # use existing VPC in LINZ AWS account. # VPC with these tags is required to exist in AWS account before being deployed. # A VPC will not be deployed by this project. vpc = aws_ec2.Vpc.from_lookup( self, "datalake-vpc", tags={ APPLICATION_NAME_TAG_NAME: APPLICATION_NAME, "ApplicationLayer": "networking", }, ) compute_resources = aws_batch.ComputeResources( vpc=vpc, minv_cpus=0, desiredv_cpus=0, maxv_cpus=1000, instance_types=instance_types, instance_role=batch_instance_profile.instance_profile_name, allocation_strategy=aws_batch.AllocationStrategy( "BEST_FIT_PROGRESSIVE"), launch_template=launch_template, ) batch_service_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AWSBatchServiceRole") service_role = aws_iam.Role( self, "batch-service-role", assumed_by=aws_iam.ServicePrincipal( "batch.amazonaws.com"), # type: ignore[arg-type] managed_policies=[batch_service_policy], ) compute_environment = aws_batch.ComputeEnvironment( self, "compute-environment", compute_resources=compute_resources, service_role=service_role, # type: ignore[arg-type] ) self.job_queue = aws_batch.JobQueue( scope, f"{construct_id}-job-queue", compute_environments=[ aws_batch.JobQueueComputeEnvironment( compute_environment=compute_environment, order=10 # type: ignore[arg-type] ), ], priority=10, )
def _add_compute_resource_launch_template( self, queue, compute_resource, instance_type, queue_pre_install_action, queue_post_install_action, queue_lt_security_groups, queue_placement_group, ): # LT network interfaces compute_lt_nw_interfaces = [ ec2.CfnLaunchTemplate.NetworkInterfaceProperty( device_index=0, associate_public_ip_address=queue.networking.assign_public_ip if compute_resource.max_network_interface_count == 1 else None, # parameter not supported for instance types with multiple network interfaces interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None, groups=queue_lt_security_groups, subnet_id=queue.networking.subnet_ids[0], ) ] for device_index in range( 1, compute_resource.max_network_interface_count): compute_lt_nw_interfaces.append( ec2.CfnLaunchTemplate.NetworkInterfaceProperty( device_index=device_index, network_card_index=device_index, interface_type="efa" if compute_resource.efa and compute_resource.efa.enabled else None, groups=queue_lt_security_groups, subnet_id=queue.networking.subnet_ids[0], )) instance_market_options = None if queue.capacity_type == CapacityType.SPOT: instance_market_options = ec2.CfnLaunchTemplate.InstanceMarketOptionsProperty( market_type="spot", spot_options=ec2.CfnLaunchTemplate.SpotOptionsProperty( spot_instance_type="one-time", instance_interruption_behavior="terminate", max_price=None if compute_resource.spot_price is None else str(compute_resource.spot_price), ), ) ec2.CfnLaunchTemplate( self.stack_scope, f"ComputeServerLaunchTemplate{create_hash_suffix(queue.name + instance_type)}", launch_template_name= f"{self.stack_name}-{queue.name}-{instance_type}", launch_template_data=ec2.CfnLaunchTemplate. LaunchTemplateDataProperty( instance_type=instance_type, cpu_options=ec2.CfnLaunchTemplate.CpuOptionsProperty( core_count=compute_resource.vcpus, threads_per_core=1) if compute_resource.pass_cpu_options_in_launch_template else None, block_device_mappings=get_block_device_mappings( queue.compute_settings.local_storage, self.config.image.os), # key_name=, network_interfaces=compute_lt_nw_interfaces, placement=ec2.CfnLaunchTemplate.PlacementProperty( group_name=queue_placement_group), image_id=self.config.image_dict[queue.name], ebs_optimized=compute_resource.is_ebs_optimized, iam_instance_profile=ec2.CfnLaunchTemplate. IamInstanceProfileProperty( name=self.instance_profiles[queue.name]), instance_market_options=instance_market_options, user_data=Fn.base64( Fn.sub( get_user_data_content( "../resources/compute_node/user_data.sh"), { **{ "EnableEfa": "efa" if compute_resource.efa and compute_resource.efa.enabled else "NONE", "RAIDOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.RAID), "DisableHyperThreadingManually": "true" if compute_resource.disable_simultaneous_multithreading_manually else "false", "BaseOS": self.config.image.os, "PreInstallScript": queue_pre_install_action.script if queue_pre_install_action else "NONE", "PreInstallArgs": join_shell_args(queue_pre_install_action.args) if queue_pre_install_action and queue_pre_install_action.args else "NONE", "PostInstallScript": queue_post_install_action.script if queue_post_install_action else "NONE", "PostInstallArgs": join_shell_args(queue_post_install_action.args) if queue_post_install_action and queue_post_install_action.args else "NONE", "EFSId": get_shared_storage_ids_by_type( self.shared_storage_mappings, SharedStorageType.EFS), "EFSOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.EFS), # FIXME "FSXId": get_shared_storage_ids_by_type( self.shared_storage_mappings, SharedStorageType.FSX), "FSXMountName": self.shared_storage_attributes[SharedStorageType.FSX].get( "MountName", ""), "FSXDNSName": self.shared_storage_attributes[SharedStorageType.FSX].get( "DNSName", ""), "FSXOptions": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.FSX), "Scheduler": self.config.scheduling.scheduler, "EphemeralDir": queue.compute_settings.local_storage.ephemeral_volume.mount_dir if queue.compute_settings and queue.compute_settings.local_storage and queue.compute_settings.local_storage.ephemeral_volume else "/scratch", "EbsSharedDirs": get_shared_storage_options_by_type( self.shared_storage_options, SharedStorageType.EBS), "ClusterDNSDomain": str(self.cluster_hosted_zone.name) if self.cluster_hosted_zone else "", "ClusterHostedZone": str(self.cluster_hosted_zone.ref) if self.cluster_hosted_zone else "", "OSUser": OS_MAPPING[self.config.image.os]["user"], "DynamoDBTable": self.dynamodb_table.ref, "LogGroupName": self.log_group.log_group_name if self.config.monitoring.logs.cloud_watch.enabled else "NONE", "IntelHPCPlatform": "true" if self.config.is_intel_hpc_platform_enabled else "false", "CWLoggingEnabled": "true" if self.config.is_cw_logging_enabled else "false", "QueueName": queue.name, "EnableEfaGdr": "compute" if compute_resource.efa and compute_resource.efa.gdr_support else "NONE", "CustomNodePackage": self.config.custom_node_package or "", "CustomAwsBatchCliPackage": self.config.custom_aws_batch_cli_package or "", "ExtraJson": self.config.extra_chef_attributes, }, **get_common_user_data_env(queue, self.config), }, )), monitoring=ec2.CfnLaunchTemplate.MonitoringProperty( enabled=False), tag_specifications=[ ec2.CfnLaunchTemplate.TagSpecificationProperty( resource_type="instance", tags=get_default_instance_tags( self.stack_name, self.config, compute_resource, "Compute", self.shared_storage_mappings) + [ CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name) ] + get_custom_tags(self.config), ), ec2.CfnLaunchTemplate.TagSpecificationProperty( resource_type="volume", tags=get_default_volume_tags( self.stack_name, "Compute") + [ CfnTag(key=PCLUSTER_QUEUE_NAME_TAG, value=queue.name) ] + get_custom_tags(self.config), ), ], ), )
def __init__( self, scope: Construct, id: str, buckets: List[IBucket] = None, instance_type: str = "ml.t2.medium", instance_volume_size: int = 10, notebook_path: Union[Path, None] = None, notebook_destination_bucket: IBucket = None, notebook_destination_prefix: str = None, ): super().__init__(scope, id) self.buckets = buckets if buckets else [] self.deployment = None self.instance = None self.policies = NotebookInlinePolicies(self) # permissions for the notebook instance notebook_role = iam.Role( self, "InstanceRole", assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), inline_policies={ "SagemakerNotebookCloudWatchLogs": self.policies.cloudwatch_logs_write(), "ForecastBucketAccessPolicy": self.policies.s3_access(buckets), "SagemakerNotebookListTags": self.policies.sagemaker_tags_read(), "NotebookBucketAccessPolicy": self.policies.s3_solutions_access(), }, ) # lifecycle configuration lifecycle_config_path = os.path.join(os.path.dirname(__file__), "lifecycle_config.py") with open(lifecycle_config_path) as lifecycle_config: lifecycle_config_code = lifecycle_config.read() lifecycle_config = CfnNotebookInstanceLifecycleConfig( self, "LifecycleConfig") lifecycle_config.add_property_override("OnStart", [{ "Content": { "Fn::Base64": lifecycle_config_code } }]) # notebook instance self.instance = CfnNotebookInstance( self, "NotebookInstance", notebook_instance_name= f"{Aws.STACK_NAME}-aws-forecast-visualization", instance_type=instance_type, role_arn=notebook_role.role_arn, volume_size_in_gb=instance_volume_size, lifecycle_config_name=lifecycle_config. attr_notebook_instance_lifecycle_config_name, tags=[ CfnTag( key="FORECAST_BUCKET", value=Fn.base64(notebook_destination_bucket.bucket_name), ), CfnTag( key="NOTEBOOK_BUCKET", value=self.get_notebook_source( notebook_destination_bucket), ), CfnTag( key="NOTEBOOK_PREFIX", value=self.get_notebook_prefix(), ), ], ) add_cfn_nag_suppressions( self.instance, [ CfnNagSuppression( "W1201", "Require access to all resources; Not all Amazon Forecast resources support resource based policy", ) ], ) self.instance.override_logical_id("NotebookInstance") # create notebook assets if (notebook_path and notebook_destination_prefix and notebook_destination_bucket): assets = [Source.asset(path=str(notebook_path))] self.deployment = BucketDeployment( self, "Notebooks", destination_bucket=notebook_destination_bucket, destination_key_prefix=notebook_destination_prefix, sources=assets, )