Exemplo n.º 1
0
    def __init__(
        self,
        scope: Construct,
        construct_id: str,
        *,
        deploy_env: str,
        directory: str,
        job_role: aws_iam.Role,
    ):
        if deploy_env == "prod":
            batch_job_definition_memory_limit = 3900
        else:
            batch_job_definition_memory_limit = 500

        image = aws_ecs.ContainerImage.from_asset(
            directory=".",
            build_args={"task": directory},
            file="backend/Dockerfile",
        )

        container = aws_batch.JobDefinitionContainer(
            image=image,
            job_role=job_role,  # type: ignore[arg-type]
            memory_limit_mib=batch_job_definition_memory_limit,
            vcpus=1,
            environment={
                "AWS_DEFAULT_REGION": job_role.stack.region,
                "DEPLOY_ENV": deploy_env,
            },
        )

        super().__init__(scope, construct_id, container=container)
    def __init__(self, scope: core.Construct, id: str, config_dict,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        """ get Comp Reg ECR Image details """
        comp_reg_image_id = ecs.ContainerImage.from_ecr_repository(
            repository=ecr.Repository.from_repository_name(
                self,
                "GetCompRegRepoName",
                repository_name=config_dict['workflow_ecr_repo']),
            tag=config_dict['workflow_comp_reg_image_version'])
        """ Create Comp Reg Batch Job Definition """
        createCompRegJob = batch.JobDefinition(
            self,
            "createCompRegJob",
            job_definition_name="comp-reg-etl-job",
            retry_attempts=2,
            container=batch.JobDefinitionContainer(
                image=comp_reg_image_id,
                memory_limit_mib=4000,
                vcpus=1,
                environment=dict(
                    COMPREG_ORACLE_SECRET_NAME=config_dict[
                        'comp_reg_secret_name'],
                    COMPREG_BUCKET=config_dict['datalake_bucket_name'])))

        core.CfnOutput(self,
                       "createCompRegJobName",
                       value=createCompRegJob.job_definition_name)
Exemplo n.º 3
0
    def __init__(
        self,
        scope: Construct,
        construct_id: str,
        *,
        env_name: str,
        directory: str,
        job_role: aws_iam.Role,
    ):
        if env_name == PRODUCTION_ENVIRONMENT_NAME:
            batch_job_definition_memory_limit = 3900
        else:
            batch_job_definition_memory_limit = 500

        image = aws_ecs.ContainerImage.from_asset(
            directory=".",
            build_args={"task": directory},
            file=join(BACKEND_DIRECTORY, "Dockerfile"),
        )

        container = aws_batch.JobDefinitionContainer(
            image=image,
            job_role=job_role,  # type: ignore[arg-type]
            memory_limit_mib=batch_job_definition_memory_limit,
            vcpus=1,
            environment={
                "AWS_DEFAULT_REGION": job_role.stack.region,
                ENV_NAME_VARIABLE_NAME: env_name,
            },
        )

        super().__init__(scope, construct_id, container=container)
Exemplo n.º 4
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        job_name = 'npp-batch-job'

        # TODO pass this in somehow, SSM?
        job_role = iam.Role.from_role_arn(
            self,
            'job-role',
            role_arn='arn:aws:iam::138863487738:role/covariate-ingest-batch-job'
        )

        # TODO pass this in somehow, SSM?
        job_queue = batch.JobQueue.from_job_queue_arn(
            self,
            'batch-queue',
            job_queue_arn=
            'arn:aws:batch:us-east-1:138863487738:job-queue/covariate-ingest-cpu-queue'
        )

        npp_job = batch.JobDefinition(
            self,
            job_name,
            # Use a readable name for executing.
            job_definition_name=job_name,
            container=batch.JobDefinitionContainer(
                # https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_batch/JobDefinitionContainer.html
                image=ecs.ContainerImage.from_asset("./jobs/npp/", ),
                job_role=job_role,
                memory_limit_mib=4096,
                vcpus=1,
                environment={
                    "ENV":
                    "",
                    "AWS_BUCKET":
                    "covariate-ingest-data",
                    "STAC_API":
                    "https://discovery-cosmos.azurewebsites.net/stac/dev/addItem"
                },
                privileged=False,
            ),
        )

        events.Rule(
            self,
            'npp-ingest-trigger',
            description='Trigger for sample ingest',
            schedule=events.Schedule.cron(minute="0",
                                          hour="4"),  # Every day at 4am
            targets=[
                targets.BatchJob(
                    job_queue=job_queue,
                    job_definition=npp_job,
                )
            ],
            enabled=False)
Exemplo n.º 5
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # ========================
        # VPC
        # ========================
        
        # VPC
        vpc = ec2.Vpc(
            self, 'fetch-and-run-vpc',
            max_azs=2,
            subnet_configuration=[
                ec2.SubnetConfiguration(
                    name='public-subnet',
                    subnet_type=ec2.SubnetType.PUBLIC
                )
            ],
            nat_gateways=0
        )

        # Security Group
        sg = ec2.SecurityGroup(
            self, 'fetch-and-run-sg',
            vpc=vpc,
            description='SG for fetch and run',
            security_group_name='fetch-and-run-sg'
        )

        # Ingress from IP address via HTTP, SSH
        for port in PORTS:
            sg.add_ingress_rule(
                peer=ec2.Peer.ipv4(IP_ADDRESS),
                connection=ec2.Port.tcp(port)   
            )

        # ========================
        # IAM
        # ========================

        '''
        I. Batch Instance Role
        - Makes calls to other AWS services on your behalf to
        manage the resources that you use with the service
        '''

        batch_service_role = iam.Role.from_role_arn(
            self, 'batch-service-role',
            role_arn=BATCH_SERVICE_ROLE_ARN
        )

        '''
        II. ECS Instance Role
        - Batch compute environmens are populated with ECS container instances,
        which run the ECS container agent locally
        - ECS container agent makes calls to AWS APIs on your behalf
        - Container instances that run the agent require a policy and role for
        these services to know that the agent belongs to you

        - Instance Profile uses the batch instance role name
        - This is fed into the compute environment    
        '''

        batch_instance_role = iam.Role.from_role_arn(
            self, 'batch-instance-role',
            role_arn=ECS_INSTANCE_ROLE_ARN
        )

        instance_profile = iam.CfnInstanceProfile(
            self, 'instance-profile',
            roles=[batch_instance_role.role_name]
        )

        '''
        Job Role
        - Used in the job definition
        - IAM role that the container can assume for AWS permissions
        
        When the fetch_and_run image runs as an AWS Batch job, it fetches the job
        script from Amazon S3. You need an IAM role that the AWS Batch job can use
        to access S3

        Trusted Entity --> AWS service --> Elastic Container Service --> Elastic
        Container Service Task 
        - In the Role's trust relationship, this will be displayed as follows:
        {
            "Version": "2012-10-17",
            "Statement": [
                {
                "Sid": "",
                "Effect": "Allow",
                "Principal": {
                    "Service": "ecs-tasks.amazonaws.com"
                },
                "Action": "sts:AssumeRole"
                }
            ]
        }

        Default is for a role to be created
        '''
        batch_job_role = iam.Role.from_role_arn(
            self, 'batch-job-role',
            role_arn=BATCH_JOB_ROLE_ARN
        )

        # ========================
        # ECR
        # ========================
        '''
        Repository

        TODO: Evaluate integrating repository into CDK (in this stack or another)
        '''
        ecr_repository = ecr.Repository.from_repository_name(
            self, 'ecr-repository',
            repository_name=ECR_REPOSITORY_NAME
        )
        
        '''
        Container Image
        
        NOTE: We are pulling the image directly from ECR. Pushed before stack is created.
        - Can alternatively create the image from files in the stack (commented out)
        
        TODO: Evaluate ability to programatically update the tag.
        - Manually updating the tag follows approach of pushing image before stack creation/updates
        - Review adding alphanumeric tag as opposed to simply 'latest' --> more detail for auditing
        '''
        # image_asset = ecr_assets.DockerImageAsset(
        #     self, 'docker-image',
        #     directory='./fetch-and-run',
        #     file='./Dockerfile'
        # )
        # image = ecs.ContainerImage.from_docker_image_asset(image_asset)

        image = ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository,
            tag='latest'
        )

        # ========================
        # BATCH
        # ========================

        '''
        I. Compute Environment
        - Execution runtime of submitted batch jobs 
        '''
        compute_environment = batch.ComputeEnvironment(
            self, 'batch-compute-environment',
            compute_environment_name='batch-compute-environment',
            compute_resources=batch.ComputeResources(
                vpc=vpc,
                # BEST_FIT_PROGRESSIVE will select an additional instance type that is large enough to meet the requirements of the jobs in the queue, with a preference for an instance type with a lower cost.
                allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
                compute_resources_tags={
                    "name": "fetch-and-run"
                },
                ec2_key_pair=KEY_PAIR,
                instance_role=instance_profile.attr_arn,
                security_groups=[sg],
                type=batch.ComputeResourceType.ON_DEMAND,
                vpc_subnets=ec2.SubnetSelection(
                        subnet_type=ec2.SubnetType.PUBLIC)
            ),
            service_role=batch_service_role,
        )

        '''
        II. Job Queue
        - Queue where batch jobs can be submitted
        '''

        job_queue = batch.JobQueue(
            self, 'fetch-and-run-queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=compute_environment,
                    order=1
                )],
            job_queue_name='fetch-and-run-queue'
        )

        '''
        III. Job Definition
        - Group various job properties (image, resource requirements, env variables)
        into a single definition. Definitionns are used to job submission time
        
        TODO: Build out functionality for the following:
        - `command` => The command that is passed to the container. If you provide a shell command as a single string, you have to quote command-line arguments
        - `environment` => The environment variables to pass to the container
        - `mount_points` => The mount points for data volumes in your container
        - `volumes` => A list of data volumes used in a job.
        
        NOTE: Can optionally add command, environment variables directly in code
        - Alternatively can reference them in `fetch_and_run.sh`
        '''

        job_definition = batch.JobDefinition(
            self, 'fetch-and-run-job-definition',
            container=batch.JobDefinitionContainer(
                image=image,
                job_role=batch_job_role,
                # The hard limit (in MiB) of memory to present to the container
                memory_limit_mib=500,

                # The number of vCPUs reserved for the container. Each vCPU is equivalent to 1,024 CPU
                vcpus=1,
                user="******"
            )
        )
Exemplo n.º 6
0
    def __init__(self, scope: core.Construct, id: str, props,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(self,
                                                    bucket,
                                                    bucket_name=bucket)
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSBatchServiceRole')
            ])

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2SpotFleetTaggingRole')
            ])

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AmazonEC2ContainerServiceforEC2Role')
            ])
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=[
                "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume",
                "ec2:CreateTags", "ec2:ModifyInstanceAttribute"
            ],
                                resources=["*"]))
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(actions=["ecs:ListClusters"], resources=["*"]))
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name])

        ################################################################################
        # Minimal networking
        # TODO: import resource created with TF
        vpc = props['vpc']

        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [{
            'deviceName': '/dev/xvdf',
            'ebs': {
                'deleteOnTermination': True,
                'volumeSize': 1024,
                'volumeType': 'gp2'
            }
        }]

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(user_data_script),
                'blockDeviceMappings': block_device_mappings
            })

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest')

        my_compute_res = batch.ComputeResources(
            type=batch.ComputeResourceType.SPOT,
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=128,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(
                ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            #compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccrise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res)

        job_queue = batch.JobQueue(self,
                                   'UmccriseJobQueue',
                                   job_queue_name='cdk-umccrise_job_queue',
                                   compute_environments=[
                                       batch.JobQueueComputeEnvironment(
                                           compute_environment=my_compute_env,
                                           order=1)
                                   ],
                                   priority=10)

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(
                name=props['container_image']),
            vcpus=2,
            memory_limit_mib=2048,
            command=["/opt/container/umccrise-wrapper.sh", "Ref::vcpus"],
            mount_points=[
                ecs.MountPoint(container_path='/work',
                               read_only=False,
                               source_volume='work'),
                ecs.MountPoint(container_path='/opt/container',
                               read_only=True,
                               source_volume='container')
            ],
            volumes=[
                ecs.Volume(name='container',
                           host=ecs.Host(source_path='/opt/container')),
                ecs.Volume(name='work', host=ecs.Host(source_path='/mnt'))
            ],
            privileged=True)

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5))

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AWSBatchFullAccess')  # TODO: restrict!
            ])

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(self,
                       'UmccriseLambda',
                       function_name='umccrise_batch_lambda',
                       handler='umccrise.lambda_handler',
                       runtime=lmbda.Runtime.PYTHON_3_7,
                       code=lmbda.Code.from_asset('lambdas/umccrise'),
                       environment={
                           'JOBNAME_PREFIX': "UMCCRISE_",
                           'JOBQUEUE': job_queue.job_queue_name,
                           'REFDATA_BUCKET': props['refdata_bucket'],
                           'DATA_BUCKET': props['data_bucket'],
                           'UMCCRISE_MEM': '50000',
                           'UMCCRISE_VCPUS': '16'
                       },
                       role=lambda_role)
Exemplo n.º 7
0
    def __init__(self, app: core.Construct, stack_name: str, vpc: aws_ec2.Vpc,
                 security_group: aws_ec2.SecurityGroup):
        super().__init__(scope=app, id=f"{stack_name}-batch")

        batch_role = aws_iam.Role(
            scope=self,
            id=f"batch_role",
            role_name=f"batch_role",
            assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com"))

        batch_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AWSBatchServiceRole",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole"))

        batch_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # Role to attach EC2
        instance_role = aws_iam.Role(
            scope=self,
            id=f"instance_role",
            role_name=f"instance_role_for",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"))

        instance_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonEC2ContainerServiceforEC2Role",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
            ))

        # add policy to access S3
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["s3:*"]))

        # add policy to access CloudWatch Logs
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # attach role to EC2
        instance_profile = aws_iam.CfnInstanceProfile(
            scope=self,
            id=f"instance_profile",
            instance_profile_name=f"instance_profile",
            roles=[instance_role.role_name])

        # ===== #
        # batch #
        # ===== #
        batch_compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            maxv_cpus=4,
            minv_cpus=0,
            security_groups=[security_group],
            instance_role=instance_profile.attr_arn,
            type=aws_batch.ComputeResourceType.SPOT)

        batch_compute_environment = aws_batch.ComputeEnvironment(
            scope=self,
            id="batch_compute_environment",
            compute_environment_name="batch_compute_environment",
            compute_resources=batch_compute_resources,
            service_role=batch_role)

        job_role = aws_iam.Role(
            scope=self,
            id=f"job_role",
            role_name=f"job_role",
            assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com"))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonECSTaskExecutionRolePolicy",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonS3FullAccess",
                managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"CloudWatchLogsFullAccess",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"))

        self.batch_job_queue = aws_batch.JobQueue(
            scope=self,
            id=f"job_queue",
            job_queue_name=f"job_queue",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=batch_compute_environment, order=1)
            ],
            priority=1)

        # ECR repository
        ecr_repository = aws_ecr_assets.DockerImageAsset(
            scope=self,
            id=f"ecr_image",
            directory="./docker",
            repository_name=f"repository")

        # get image from ECR
        container_image = aws_ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository.repository)

        # job define
        # pass `S3_BUCKET` as environment argument.
        self.batch_job_definition = aws_batch.JobDefinition(
            scope=self,
            id=f"job_definition",
            job_definition_name=f"job_definition",
            container=aws_batch.JobDefinitionContainer(
                image=container_image,
                environment={"S3_BUCKET": f"{S3_BUCKET}"},
                job_role=job_role,
                vcpus=1,
                memory_limit_mib=1024))
Exemplo n.º 8
0
    def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        dirname = os.path.dirname(__file__)

        ecr_repo = ecr.Repository.from_repository_name(
            self,
            'UmccriseEcrRepo',
            repository_name='umccrise'
        )

        ################################################################################
        # Set up permissions
        ro_buckets = set()
        for bucket in props['ro_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            ro_buckets.add(tmp_bucket)

        rw_buckets = set()
        for bucket in props['rw_buckets']:
            tmp_bucket = s3.Bucket.from_bucket_name(
                self,
                bucket,
                bucket_name=bucket
            )
            rw_buckets.add(tmp_bucket)

        batch_service_role = iam.Role(
            self,
            'BatchServiceRole',
            assumed_by=iam.ServicePrincipal('batch.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole')
            ]
        )

        spotfleet_role = iam.Role(
            self,
            'AmazonEC2SpotFleetRole',
            assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole')
            ]
        )

        # Create role for Batch instances
        batch_instance_role = iam.Role(
            self,
            'BatchInstanceRole',
            role_name='UmccriseBatchInstanceRole',
            assumed_by=iam.CompositePrincipal(
                iam.ServicePrincipal('ec2.amazonaws.com'),
                iam.ServicePrincipal('ecs.amazonaws.com')
            ),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'),
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role')
            ]
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ec2:Describe*",
                    "ec2:AttachVolume",
                    "ec2:CreateVolume",
                    "ec2:CreateTags",
                    "ec2:ModifyInstanceAttribute"
                ],
                resources=["*"]
            )
        )
        batch_instance_role.add_to_policy(
            iam.PolicyStatement(
                actions=[
                    "ecs:ListClusters"
                ],
                resources=["*"]
            )
        )
        for bucket in ro_buckets:
            bucket.grant_read(batch_instance_role)
        for bucket in rw_buckets:
            # restirct write to paths with */umccrise/*
            bucket.grant_read_write(batch_instance_role, '*/umccrised/*')

        # Turn the instance role into a Instance Profile
        batch_instance_profile = iam.CfnInstanceProfile(
            self,
            'BatchInstanceProfile',
            instance_profile_name='UmccriseBatchInstanceProfile',
            roles=[batch_instance_role.role_name]
        )

        ################################################################################
        # Network
        # (Import common infrastructure (maintained via TerraForm)

        # VPC
        vpc = ec2.Vpc.from_lookup(
            self,
            'UmccrMainVpc',
            tags={'Name': 'main-vpc', 'Stack': 'networking'}
        )

        batch_security_group = ec2.SecurityGroup(
            self,
            "BatchSecurityGroup",
            vpc=vpc,
            description="Allow all outbound, no inbound traffic"
        )
        ################################################################################
        # Setup Batch compute resources

        # Configure BlockDevice to expand instance disk space (if needed?)
        block_device_mappings = [
            {
                'deviceName': '/dev/xvdf',
                'ebs': {
                    'deleteOnTermination': True,
                    'encrypted': True,
                    'volumeSize': 2048,
                    'volumeType': 'gp2'
                }
            }
        ]

        # Set up custom user data to configure the Batch instances
        umccrise_wrapper_asset = assets.Asset(
            self,
            'UmccriseWrapperAsset',
            path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh")
        )
        umccrise_wrapper_asset.grant_read(batch_instance_role)

        user_data_asset = assets.Asset(
            self,
            'UserDataAsset',
            path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh")
        )
        user_data_asset.grant_read(batch_instance_role)

        user_data = ec2.UserData.for_linux()
        local_path = user_data.add_s3_download_command(
            bucket=user_data_asset.bucket,
            bucket_key=user_data_asset.s3_object_key
        )
        user_data.add_execute_file_command(
            file_path=local_path,
            arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}"
        )

        # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data
        mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0')
        mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="')
        mime_wrapper.add_commands('')
        mime_wrapper.add_commands('--==MYBOUNDARY==')
        mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"')
        mime_wrapper.add_commands('')
        # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI...
        mime_wrapper.add_commands('yum -y install unzip')
        mime_wrapper.add_commands('cd /opt')
        mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"')
        mime_wrapper.add_commands('unzip awscliv2.zip')
        mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin')
        # insert our actual user data payload
        mime_wrapper.add_commands(user_data.render())
        mime_wrapper.add_commands('--==MYBOUNDARY==--')

        launch_template = ec2.CfnLaunchTemplate(
            self,
            'UmccriseBatchComputeLaunchTemplate',
            launch_template_name='UmccriseBatchComputeLaunchTemplate',
            launch_template_data={
                'userData': core.Fn.base64(mime_wrapper.render()),
                'blockDeviceMappings': block_device_mappings
            }
        )

        launch_template_spec = batch.LaunchTemplateSpecification(
            launch_template_name=launch_template.launch_template_name,
            version='$Latest'
        )

        my_compute_res = batch.ComputeResources(
            type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND),
            allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE,
            desiredv_cpus=0,
            maxv_cpus=320,
            minv_cpus=0,
            image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}),
            launch_template=launch_template_spec,
            spot_fleet_role=spotfleet_role,
            instance_role=batch_instance_profile.instance_profile_name,
            vpc=vpc,
            vpc_subnets=ec2.SubnetSelection(
                subnet_type=ec2.SubnetType.PRIVATE,
                # availability_zones=["ap-southeast-2a"]
            ),
            security_groups=[batch_security_group]
            # compute_resources_tags=core.Tag('Creator', 'Batch')
        )
        # XXX: How to add more than one tag above??
        # https://github.com/aws/aws-cdk/issues/7350
        # core.Tag.add(my_compute_res, 'Foo', 'Bar')

        my_compute_env = batch.ComputeEnvironment(
            self,
            'UmccriseBatchComputeEnv',
            compute_environment_name="cdk-umccr_ise-batch-compute-env",
            service_role=batch_service_role,
            compute_resources=my_compute_res
        )
        # child = my_compute_env.node.default_child
        # child_comp_res = child.compute_resources
        # child_comp_res.tags = "{'Foo': 'Bar'}"

        job_queue = batch.JobQueue(
            self,
            'UmccriseJobQueue',
            job_queue_name='cdk-umccrise_job_queue',
            compute_environments=[
                batch.JobQueueComputeEnvironment(
                    compute_environment=my_compute_env,
                    order=1
                )
            ],
            priority=10
        )

        job_container = batch.JobDefinitionContainer(
            image=ecs.ContainerImage.from_registry(name=props['container_image']),
            vcpus=32,
            memory_limit_mib=100000,
            command=[
                "/opt/container/umccrise-wrapper.sh",
                "Ref::vcpus"
            ],
            mount_points=[
                ecs.MountPoint(
                    container_path='/work',
                    read_only=False,
                    source_volume='work'
                ),
                ecs.MountPoint(
                    container_path='/opt/container',
                    read_only=True,
                    source_volume='container'
                )
            ],
            volumes=[
                ecs.Volume(
                    name='container',
                    host=ecs.Host(
                        source_path='/opt/container'
                    )
                ),
                ecs.Volume(
                    name='work',
                    host=ecs.Host(
                        source_path='/mnt'
                    )
                )
            ],
            privileged=True
        )

        job_definition = batch.JobDefinition(
            self,
            'UmccriseJobDefinition',
            job_definition_name='cdk-umccrise-job-definition',
            parameters={'vcpus': '1'},
            container=job_container,
            timeout=core.Duration.hours(5)
        )

        ################################################################################
        # Set up job submission Lambda

        lambda_role = iam.Role(
            self,
            'UmccriseLambdaRole',
            assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'),
                iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess')  # TODO: restrict!
            ]
        )

        for bucket in ro_buckets:
            bucket.grant_read(lambda_role)
        for bucket in rw_buckets:
            bucket.grant_read(lambda_role)
        ecr_repo.grant(lambda_role, 'ecr:ListImages')

        # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod
        #       may need a default JobDefinition to be set up
        lmbda.Function(
            self,
            'UmccriseLambda',
            function_name='umccrise_batch_lambda',
            handler='umccrise.lambda_handler',
            runtime=lmbda.Runtime.PYTHON_3_7,
            code=lmbda.Code.from_asset('lambdas/umccrise'),
            environment={
                'JOBNAME_PREFIX': "UMCCRISE_",
                'JOBQUEUE': job_queue.job_queue_name,
                'UMCCRISE_MEM': '100000',
                'UMCCRISE_VCPUS': '32',
                'JOBDEF': job_definition.job_definition_name,
                'REFDATA_BUCKET': props['refdata_bucket'],
                'INPUT_BUCKET': props['input_bucket'],
                'RESULT_BUCKET': props['result_bucket'],
                'IMAGE_CONFIGURABLE': props['image_configurable']
            },
            role=lambda_role
        )
Exemplo n.º 9
0
    def __init__(self, scope: cdk.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        table = dynamodb.Table(
            self,
            "TheTable",
            table_name="cdk-table",
            partition_key=dynamodb.Attribute(
                name="id", type=dynamodb.AttributeType.STRING),
            removal_policy=cdk.RemovalPolicy.DESTROY,
        )

        # compute_environment = batch.ComputeEnvironment(
        #     self,
        #     "MyComputeEnvironment",
        #     compute_environment_name="cdk-env",
        #     compute_resources=batch.ComputeResources(
        #         vpc=Vpc.from_lookup(self, "VPC", is_default=True),
        #     ),
        #     enabled=True,
        #     managed=True,
        # )

        job_role = Role(
            self,
            "BatchJobRole",
            assumed_by=ServicePrincipal("ecs-tasks.amazonaws.com"),
            description="Role for a container in a Batch job",
            role_name="CDK-BatchJobRole",
            managed_policies=[
                ManagedPolicy.from_aws_managed_policy_name(
                    managed_policy_name="AmazonDynamoDBFullAccess"),
            ],
        )

        repository = Repository(
            self,
            "MyRepository",
            removal_policy=cdk.RemovalPolicy.DESTROY,
            repository_name="cdk-my-repository",
            lifecycle_rules=[
                LifecycleRule(max_image_count=5, description="Max 5 images")
            ],
        )

        image: ContainerImage = ContainerImage.from_ecr_repository(
            repository=repository,
            tag="latest",
        )

        container = batch.JobDefinitionContainer(
            image=image,
            job_role=job_role,
            command=["python", "run.py", "--help"],
            environment={
                "READINGS_TABLE": table.table_name,
                "AWS_REGION": self.region,
            },
            vcpus=1,
            log_configuration=batch.LogConfiguration(
                log_driver=batch.LogDriver.AWSLOGS),
            memory_limit_mib=2048,
        )

        batch.JobDefinition(
            self,
            "JobDefinitionCreate",
            container=container,
            job_definition_name="create",
            retry_attempts=1,
        )
Exemplo n.º 10
0
    def __init__(self, app: core.App, stack_name: str, stack_env: str):
        super().__init__(scope=app, id=f"{stack_name}-{stack_env}")

        # CIDR
        cidr = "192.168.0.0/24"

        # === #
        # vpc #
        # === #
        vpc = aws_ec2.Vpc(
            self,
            id=f"{stack_name}-{stack_env}-vpc",
            cidr=cidr,
            subnet_configuration=[
                # Public Subnet
                aws_ec2.SubnetConfiguration(
                    cidr_mask=28,
                    name=f"{stack_name}-{stack_env}-public",
                    subnet_type=aws_ec2.SubnetType.PUBLIC,
                )
            ],
        )

        security_group = aws_ec2.SecurityGroup(
            self,
            id=f'security-group-for-{stack_name}-{stack_env}',
            vpc=vpc,
            security_group_name=f'security-group-for-{stack_name}-{stack_env}',
            allow_all_outbound=True)

        batch_role = aws_iam.Role(
            scope=self,
            id=f"batch_role_for_{stack_name}-{stack_env}",
            role_name=f"batch_role_for_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("batch.amazonaws.com"))

        batch_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AWSBatchServiceRole-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AWSBatchServiceRole"))

        batch_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # Role to attach EC2
        instance_role = aws_iam.Role(
            scope=self,
            id=f"instance_role_for_{stack_name}-{stack_env}",
            role_name=f"instance_role_for_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("ec2.amazonaws.com"))

        instance_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonEC2ContainerServiceforEC2Role-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role"
            ))

        # add policy to access S3
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["s3:*"]))

        # add policy to access CloudWatch Logs
        instance_role.add_to_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["arn:aws:logs:*:*:*"],
                                    actions=[
                                        "logs:CreateLogGroup",
                                        "logs:CreateLogStream",
                                        "logs:PutLogEvents",
                                        "logs:DescribeLogStreams"
                                    ]))

        # attach role to EC2
        instance_profile = aws_iam.CfnInstanceProfile(
            scope=self,
            id=f"instance_profile_for_{stack_name}-{stack_env}",
            instance_profile_name=
            f"instance_profile_for_{stack_name}-{stack_env}",
            roles=[instance_role.role_name])

        # ===== #
        # batch #
        # ===== #
        batch_compute_resources = aws_batch.ComputeResources(
            vpc=vpc,
            maxv_cpus=4,
            minv_cpus=0,
            security_groups=[security_group],
            instance_role=instance_profile.attr_arn,
            type=aws_batch.ComputeResourceType.SPOT)

        batch_compute_environment = aws_batch.ComputeEnvironment(
            scope=self,
            id=f"ProjectEnvironment-{stack_env}",
            compute_environment_name=f"ProjectEnvironmentBatch-{stack_env}",
            compute_resources=batch_compute_resources,
            service_role=batch_role)

        job_role = aws_iam.Role(
            scope=self,
            id=f"job_role_{stack_name}-{stack_env}",
            role_name=f"job_role_{stack_name}-{stack_env}",
            assumed_by=aws_iam.ServicePrincipal("ecs-tasks.amazonaws.com"))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonECSTaskExecutionRolePolicy_{stack_name}-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"AmazonS3FullAccess_{stack_name}-{stack_env}",
                managed_policy_arn="arn:aws:iam::aws:policy/AmazonS3FullAccess"
            ))

        job_role.add_managed_policy(
            aws_iam.ManagedPolicy.from_managed_policy_arn(
                scope=self,
                id=f"CloudWatchLogsFullAccess_{stack_name}-{stack_env}",
                managed_policy_arn=
                "arn:aws:iam::aws:policy/CloudWatchLogsFullAccess"))

        batch_job_queue = aws_batch.JobQueue(
            scope=self,
            id=f"job_queue_for_{stack_name}-{stack_env}",
            job_queue_name=f"job_queue_for_{stack_name}-{stack_env}",
            compute_environments=[
                aws_batch.JobQueueComputeEnvironment(
                    compute_environment=batch_compute_environment, order=1)
            ],
            priority=1)

        # ECR repository
        ecr_repository = aws_ecr_assets.DockerImageAsset(
            scope=self,
            id=f"ecr_image_{stack_env}",
            directory="./docker",
            repository_name=f"repository_for_{stack_env}")

        # get image from ECR
        container_image = aws_ecs.ContainerImage.from_ecr_repository(
            repository=ecr_repository.repository)

        # job define
        # pass `S3_BUCKET` as environment argument.
        batch_job_definition = aws_batch.JobDefinition(
            scope=self,
            id=f"job_definition_for_{stack_env}",
            job_definition_name=f"job_definition_for_{stack_env}",
            container=aws_batch.JobDefinitionContainer(
                image=container_image,
                environment={"S3_BUCKET": f"{S3_BUCKET}"},
                job_role=job_role,
                vcpus=1,
                memory_limit_mib=1024))

        # ============= #
        # StepFunctions #
        # ============= #
        # Ref::{keyword} can be replaced with StepFunction input
        command_overrides = ["python", "__init__.py", "--time", "Ref::time"]

        batch_task = aws_sfn_tasks.BatchSubmitJob(
            scope=self,
            id=f"batch_job_{stack_env}",
            job_definition=batch_job_definition,
            job_name=f"batch_job_{stack_env}_today",
            job_queue=batch_job_queue,
            container_overrides=aws_sfn_tasks.BatchContainerOverrides(
                command=command_overrides),
            payload=aws_sfn.TaskInput.from_object({"time.$": "$.time"}))

        # `one step` for StepFunctions
        definition = batch_task

        sfn_daily_process = aws_sfn.StateMachine(
            scope=self,
            id=f"YourProjectSFn-{stack_env}",
            definition=definition)

        # ================ #
        # CloudWatch Event #
        # ================ #

        # Run every day at 21:30 JST
        # See https://docs.aws.amazon.com/lambda/latest/dg/tutorial-scheduled-events-schedule-expressions.html
        events_daily_process = aws_events.Rule(
            scope=self,
            id=f"DailySFnProcess-{stack_env}",
            schedule=aws_events.Schedule.cron(minute="30",
                                              hour="12",
                                              month='*',
                                              day="*",
                                              year='*'),
        )
        events_daily_process.add_target(
            aws_events_targets.SfnStateMachine(sfn_daily_process))