def __init__(self, scope: core.Construct, id: str, config: Dict, vpc: ec2.Vpc, es_sg: ec2.SecurityGroup) -> None: super().__init__(scope, id) es_config = config['data']['elasticsearch'] # Build ES domain construct parameter capacity_config = es.CapacityConfig( master_node_instance_type=es_config['capacity']['masterNodes'] ['instanceType'], master_nodes=es_config['capacity']['masterNodes']['count'], data_node_instance_type=es_config['capacity']['dataNodes'] ['instanceType'], data_nodes=es_config['capacity']['dataNodes']['count'], ) vpc_options = es.VpcOptions( security_groups=[es_sg], subnets=vpc.select_subnets( subnet_group_name=es_config['subnetGroupName']).subnets, ) ebs_options = es.EbsOptions(volume_size=es_config['ebs']['volumeSize']) zone_awareness = es.ZoneAwarenessConfig( availability_zone_count=es_config['zoneAwareness']['count'], enabled=es_config['zoneAwareness']['enabled'], ) logging_options = es.LoggingOptions( app_log_enabled=es_config['logging']['appLogEnabled'], audit_log_enabled=es_config['logging']['auditLogEnabled'], slow_index_log_enabled=es_config['logging']['slowIndexLogEnabled'], slow_search_log_enabled=es_config['logging'] ['slowIearchLogEnabled']) access_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, principals=[iam.AnyPrincipal()], actions=['es:*'], resources=[ "arn:aws:es:" + config['awsRegion'] + ":" + config['awsAccount'] + ":domain/" + es_config['domainName'] + "/*" ]) # Create ES domain es.Domain( self, 'Domain', domain_name=es_config['domainName'], version=es.ElasticsearchVersion.of(es_config['version']), capacity=capacity_config, ebs=ebs_options, zone_awareness=zone_awareness, vpc_options=vpc_options, logging=logging_options, access_policies=[access_policy], )
def __init__(self, scope: core.Construct, id: str, vpc: ec2.Vpc, region: str) -> None: super().__init__(scope, id) # create an IAM role to attach to the squid instances squid_iam_role = iam.Role( self, "squid-role", assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( "CloudWatchAgentServerPolicy"), iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AmazonEC2RoleforSSM") ]) # Add policy to allow EC2 update instance attributes squid_iam_role.add_to_policy( statement=iam.PolicyStatement(effect=iam.Effect.ALLOW, actions=[ 'ec2:ModifyInstanceAttribute', ], resources=['*'])) # Create bucket to hold Squid config and whitelist files squid_config_bucket = s3.Bucket( self, "squid-config", encryption=s3.BucketEncryption.KMS_MANAGED) # Upload config and whiteliest files to S3 bucket s3_deployment.BucketDeployment( self, "config", destination_bucket=squid_config_bucket, sources=[ s3_deployment.Source.asset( path='./squid_app/squid_config_files/config_files_s3') ]) # Provide access to EC2 instance role to read and write to bucket squid_config_bucket.grant_read_write(identity=squid_iam_role) # Set the AMI to the latest Amazon Linux 2 amazon_linux_2_ami = ec2.MachineImage.latest_amazon_linux( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=ec2.AmazonLinuxEdition.STANDARD, virtualization=ec2.AmazonLinuxVirt.HVM, storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE) if vpc.public_subnets: # Squid ASGs with desired capacity as 1 Instance in each of the AZs self.squid_asgs = [] for count, az in enumerate(vpc.availability_zones, start=1): asg = autoscaling.AutoScalingGroup( self, f"asg-{count}", vpc=vpc, instance_type=ec2.InstanceType("t3.nano"), desired_capacity=1, max_capacity=1, min_capacity=1, machine_image=amazon_linux_2_ami, role=squid_iam_role, vpc_subnets=ec2.SubnetSelection( availability_zones=[az], one_per_az=True, subnet_type=ec2.SubnetType.PUBLIC), health_check=autoscaling.HealthCheck.ec2( grace=core.Duration.minutes(5)), resource_signal_count=1, resource_signal_timeout=core.Duration.minutes(10)) cfn_asg: autoscaling.CfnAutoScalingGroup = asg.node.default_child asg_logical_id = cfn_asg.logical_id # User data: Required parameters in user data script user_data_mappings = { "__S3BUCKET__": squid_config_bucket.bucket_name, "__ASG__": asg_logical_id, "__CW_ASG__": "${aws:AutoScalingGroupName}" } # Replace parameters with values in the user data with open( "./squid_app/squid_config_files/user_data/squid_user_data.sh", 'r') as user_data_h: # Use a substitution user_data_sub = core.Fn.sub(user_data_h.read(), user_data_mappings) # Add User data to Launch Config of the autoscaling group asg.add_user_data(user_data_sub) # Security group attached to the ASG Squid instances # Outbound: All allowed # Inboud: Allowed from VPC CIDR on ports 80, 443) asg.connections.allow_from( other=ec2.Peer.ipv4(vpc.vpc_cidr_block), port_range=ec2.Port(protocol=ec2.Protocol.TCP, string_representation="HTTP from VPC", from_port=80, to_port=80)) asg.connections.allow_from( other=ec2.Peer.ipv4(vpc.vpc_cidr_block), port_range=ec2.Port(protocol=ec2.Protocol.TCP, string_representation="HTTPS from VPC", from_port=443, to_port=443)) # Create ASG Lifecycle hook to enable updating of route table using Lambda when instance launches and is marked Healthy autoscaling.LifecycleHook( self, f"asg-hook-{count}", auto_scaling_group=asg, lifecycle_transition=autoscaling.LifecycleTransition. INSTANCE_LAUNCHING, notification_target=hooktargets.TopicHook( sns.Topic(self, f"squid-asg-{count}-lifecycle-hook-topic", display_name= f"Squid ASG {count} Lifecycle Hook topic")), default_result=autoscaling.DefaultResult.ABANDON, heartbeat_timeout=core.Duration.minutes(5)) # Tag ASG with the route table IDs used by the isolated and/or private subnets in the availability zone # This tag will be used by the Squid Lambda function to identify route tables to update when alarm changes from ALARM to OK private_subnets_in_az = [] isolated_subnets_in_az = [] route_table_ids = '' if vpc.private_subnets: private_subnets_in_az = vpc.select_subnets( availability_zones=[az], subnet_type=ec2.SubnetType.PRIVATE).subnets if vpc.isolated_subnets: isolated_subnets_in_az = vpc.select_subnets( availability_zones=[az], subnet_type=ec2.SubnetType.ISOLATED).subnets non_public_subnets_in_az = isolated_subnets_in_az + private_subnets_in_az # Loop through all non public subnets in AZ to identify route table and create a tag value string for subnet in non_public_subnets_in_az: if route_table_ids: route_table_ids = f"{route_table_ids},{subnet.route_table.route_table_id}" else: route_table_ids = subnet.route_table.route_table_id # Tag the ASG with route table ids core.Tag.add(asg, key='RouteTableIds', value=route_table_ids, apply_to_launched_instances=False) self.squid_asgs.append(asg) else: raise ValueError("No public subnets in VPC")
class VPCConstruct(core.Construct): def __init__(self, scope: core.Construct, id_: str, num_of_azs: int) -> None: super().__init__(scope, id_) self.audit_vpc = Vpc( self, id_, max_azs=num_of_azs, subnet_configuration=[ #Currently IOT, AppConfig & Cloudmap are not accessable via VPC endpoint, so we use NAT GW access them SubnetConfiguration(name=PRIVATE_SUBNET_GROUP, subnet_type=SubnetType.PRIVATE, cidr_mask=24), SubnetConfiguration(name=PUBLIC_NAT_GWS_SUBNET_GROUP, subnet_type=SubnetType.PUBLIC, cidr_mask=24) ], gateway_endpoints={ 'S3': GatewayVpcEndpointOptions( service=GatewayVpcEndpointAwsService.S3, subnets=[ SubnetSelection(subnet_group_name=PRIVATE_SUBNET_GROUP) ]), 'DynamoDb': GatewayVpcEndpointOptions( service=GatewayVpcEndpointAwsService.DYNAMODB, subnets=[ SubnetSelection(subnet_group_name=PRIVATE_SUBNET_GROUP) ]), }, enable_dns_support=True, # For the ElasticSearch Public Domain enable_dns_hostnames=True) self.audit_vpc.add_interface_endpoint( 'SsmVpcEndpoint', service=InterfaceVpcEndpointAwsService.SSM, subnets=SubnetSelection(one_per_az=True)) self.audit_vpc.add_interface_endpoint( 'SqsVpcEndpoint', service=InterfaceVpcEndpointAwsService.SQS, subnets=SubnetSelection(one_per_az=True)) self.audit_vpc.add_interface_endpoint( 'Ec2VpcEndpoint', service=InterfaceVpcEndpointAwsService.EC2, subnets=SubnetSelection(one_per_az=True)) self.audit_vpc.add_interface_endpoint( 'LambdaVpcEndpoint', service=InterfaceVpcEndpointAwsService.LAMBDA_, subnets=SubnetSelection(one_per_az=True)) self.lambdas_sg = SecurityGroup(self, id='LambdaSg', vpc=self.audit_vpc, security_group_name='Audit-Lambda') def _get_subnets(self, subnet_group: str) -> List[ISubnet]: return self.audit_vpc.select_subnets( subnet_group_name=subnet_group).subnets
def __init__(self, scope: core.Construct, id: str, log_bucket: _s3.Bucket, config_table: _dynamodb.Table, tshirt_size: str, sink_bucket: _s3.Bucket, vpc: _ec2.Vpc, **kwargs) -> None: super().__init__(scope, id, **kwargs) service_role = _iam.Role( self, 'BatchEmrServiceRole', assumed_by=_iam.ServicePrincipal('elasticmapreduce.amazonaws.com') ) service_role.add_managed_policy(_iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonElasticMapReduceRole')) cluster_role = _iam.Role( self, 'BatchEmrClusterRole', assumed_by=_iam.ServicePrincipal("ec2.amazonaws.com") ) _iam.Policy( self, 'BatchEmrClusterPolicy', statements=[ _iam.PolicyStatement( actions=[ "glue:CreateDatabase", "glue:UpdateDatabase", "glue:DeleteDatabase", "glue:GetDatabase", "glue:GetDatabases", "glue:CreateTable", "glue:UpdateTable", "glue:DeleteTable", "glue:GetTable", "glue:GetTables", "glue:GetTableVersions", "glue:CreatePartition", "glue:BatchCreatePartition", "glue:UpdatePartition", "glue:DeletePartition", "glue:BatchDeletePartition", "glue:GetPartition", "glue:GetPartitions", "glue:BatchGetPartition", "glue:CreateUserDefinedFunction", "glue:UpdateUserDefinedFunction", "glue:DeleteUserDefinedFunction", "glue:GetUserDefinedFunction", "glue:GetUserDefinedFunctions", "cloudwatch:PutMetricData", "dynamodb:ListTables", "s3:HeadBucket", "ec2:Describe*", ], resources=['*'] ), _iam.PolicyStatement( actions=['s3:GetObject'], resources=[ 'arn:aws:s3:::' + ARA_BUCKET_NAME + BINARIES + DataGenConfig.DSDGEN_INSTALL_SCRIPT, 'arn:aws:s3:::' + ARA_BUCKET_NAME + BINARIES + DataGenConfig.JAR_FILE ] ), _iam.PolicyStatement( actions=['s3:PutObject'], resources=[log_bucket.bucket_arn + "/data-generator/*"] ), _iam.PolicyStatement( actions=[ "s3:AbortMultipartUpload", "s3:CreateBucket", "s3:DeleteObject", "s3:GetBucketVersioning", "s3:GetObject", "s3:GetObjectTagging", "s3:GetObjectVersion", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:ListBucketVersions", "s3:ListMultipartUploadParts", "s3:PutBucketVersioning", "s3:PutObject", "s3:PutObjectTagging" ], resources=[ sink_bucket.bucket_arn + '/*', sink_bucket.bucket_arn ] ) ], roles=[cluster_role] ) cluster_role.add_managed_policy(_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore')) _iam.CfnInstanceProfile( self, 'BatchEmrClusterInstanceProfile', roles=[cluster_role.role_name], instance_profile_name=cluster_role.role_name ) # Security Groups for the EMR cluster (private subnet) # https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-man-sec-groups.html#emr-sg-elasticmapreduce-master-private master_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-Master-Private', vpc=vpc) slave_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-Slave-Private', vpc=vpc) service_sg = _ec2.SecurityGroup(self, 'ElasticMapReduce-ServiceAccess', vpc=vpc, allow_all_outbound=False) # Service SG used by the proxy instance service_sg.add_ingress_rule(master_sg, _ec2.Port.tcp(9443)) service_sg.add_egress_rule(master_sg, _ec2.Port.tcp(8443)) service_sg.add_egress_rule(slave_sg, _ec2.Port.tcp(8443)) # EMR Master master_sg.add_ingress_rule(master_sg, _ec2.Port.all_icmp()) master_sg.add_ingress_rule(master_sg, _ec2.Port.all_tcp()) master_sg.add_ingress_rule(master_sg, _ec2.Port.all_udp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_icmp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_tcp()) master_sg.add_ingress_rule(slave_sg, _ec2.Port.all_udp()) master_sg.add_ingress_rule(service_sg, _ec2.Port.tcp(8443)) # EMR Slave slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_icmp()) slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_tcp()) slave_sg.add_ingress_rule(master_sg, _ec2.Port.all_udp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_icmp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_tcp()) slave_sg.add_ingress_rule(slave_sg, _ec2.Port.all_udp()) slave_sg.add_ingress_rule(service_sg, _ec2.Port.tcp(8443)) with open('common/common_cdk/lambda/datagen_config.py', 'r') as f: lambda_source = f.read() configure_datagen_function = _lambda.SingletonFunction( self, 'BatchConfigureDatagenLambda', uuid="58a9a222-ff07-11ea-adc1-0242ac120002", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', function_name='datagen-config', environment={ 'TABLE_NAME': config_table.table_name, 'JAR_LOCATION': BINARIES_LOCATION + DataGenConfig.JAR_FILE, }, timeout=core.Duration.seconds(10) ) configure_datagen_function.role.add_to_policy( _iam.PolicyStatement( actions=[ 'dynamodb:GetItem', 'dynamodb:PutItem', ], resources=[config_table.table_arn] ) ) terminate_cluster = _sfn_tasks.EmrTerminateCluster( self, 'BatchDeleteCluster', cluster_id=_sfn.TaskInput.from_data_at("$.Emr.Cluster.Id").value, integration_pattern=_sfn.IntegrationPattern.RUN_JOB, ) terminate_cluster_error = _sfn_tasks.EmrTerminateCluster( self, 'BatchDeleteClusterError', cluster_id=_sfn.TaskInput.from_data_at("$.Emr.Cluster.Id").value, integration_pattern=_sfn.IntegrationPattern.RUN_JOB, ).next(_sfn.Fail(self, 'StepFailure')) create_cluster = _sfn_tasks.EmrCreateCluster( self, "BatchCreateEMRCluster", name="BatchDatagenCluster", result_path="$.Emr", release_label='emr-5.30.1', log_uri=log_bucket.s3_url_for_object() + "/data-generator", cluster_role=cluster_role, service_role=service_role, bootstrap_actions=[ _sfn_tasks.EmrCreateCluster.BootstrapActionConfigProperty( name="dsdgen-install", script_bootstrap_action=_sfn_tasks.EmrCreateCluster.ScriptBootstrapActionConfigProperty( path=BINARIES_LOCATION + DataGenConfig.DSDGEN_INSTALL_SCRIPT, ) ) ], applications=[ _sfn_tasks.EmrCreateCluster.ApplicationConfigProperty( name="spark" ), _sfn_tasks.EmrCreateCluster.ApplicationConfigProperty( name="hadoop" ) ], instances=_sfn_tasks.EmrCreateCluster.InstancesConfigProperty( emr_managed_master_security_group=master_sg.security_group_id, emr_managed_slave_security_group=slave_sg.security_group_id, service_access_security_group=service_sg.security_group_id, ec2_subnet_ids=vpc.select_subnets().subnet_ids, instance_fleets=[ _sfn_tasks.EmrCreateCluster.InstanceFleetConfigProperty( instance_fleet_type=_sfn_tasks.EmrCreateCluster.InstanceRoleType.MASTER, instance_type_configs=[ _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m4.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5d.xlarge', weighted_capacity=1 ), ], launch_specifications=_sfn_tasks.EmrCreateCluster.InstanceFleetProvisioningSpecificationsProperty( spot_specification=_sfn_tasks.EmrCreateCluster.SpotProvisioningSpecificationProperty( timeout_action=_sfn_tasks.EmrCreateCluster.SpotTimeoutAction.SWITCH_TO_ON_DEMAND, timeout_duration_minutes=5 ) ), target_on_demand_capacity=0, target_spot_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceFleetConfigProperty( instance_fleet_type=_sfn_tasks.EmrCreateCluster.InstanceRoleType.CORE, instance_type_configs=[ _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5.2xlarge', weighted_capacity=2 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.xlarge', weighted_capacity=1 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m5a.2xlarge', weighted_capacity=2 ), _sfn_tasks.EmrCreateCluster.InstanceTypeConfigProperty( instance_type='m4.xlarge', weighted_capacity=1 ) ], launch_specifications=_sfn_tasks.EmrCreateCluster.InstanceFleetProvisioningSpecificationsProperty( spot_specification=_sfn_tasks.EmrCreateCluster.SpotProvisioningSpecificationProperty( timeout_action=_sfn_tasks.EmrCreateCluster.SpotTimeoutAction.SWITCH_TO_ON_DEMAND, timeout_duration_minutes=5 ) ), target_on_demand_capacity=0, target_spot_capacity=DataGenConfig.BATCH_CLUSTER_SIZE[tshirt_size] ) ] ) ).add_catch(handler=terminate_cluster_error, result_path="$.error") configure_datagen = _sfn_tasks.LambdaInvoke( self, "BatchConfigureDatagenTask", lambda_function=configure_datagen_function, payload=_sfn.TaskInput.from_text('{' '"Param": "batch_iterator",' '"Module": "batch",' '"SinkBucket": "'+sink_bucket.s3_url_for_object()+'",' '"Parallelism": "'+str(int(DataGenConfig.BATCH_DATA_SIZE[tshirt_size])*2)+'",' '"DataSize": "'+DataGenConfig.BATCH_DATA_SIZE[tshirt_size]+'",' '"TmpBucket": "fake-bucket"' '}'), result_path='$.Config' ).add_catch(handler=terminate_cluster_error, result_path="$.error") add_datagen_step = _sfn.CustomState( self, 'BatchAddDataGenStep', state_json={ "Type": "Task", "Resource": "arn:aws:states:::elasticmapreduce:addStep.sync", "Parameters": { "ClusterId.$": "$.Emr.Cluster.Id", "Step": { "Name": "DatagenStep", "ActionOnFailure": "CONTINUE", "HadoopJarStep": { "Jar": "command-runner.jar", "Args.$": "$.Config.Payload.StepParam" } } }, "ResultPath": "$.Step", "Next": "BatchUpdateIterator", "Catch": [ { "ErrorEquals": ["States.ALL"], "Next": "BatchDeleteClusterError", "ResultPath": "$.error" } ] } ) update_iterator = _sfn_tasks.DynamoUpdateItem( self, 'BatchUpdateIterator', table=config_table, key={ 'param': _sfn_tasks.DynamoAttributeValue.from_string('batch_iterator') }, update_expression='SET iterator = if_not_exists(iterator, :start) + :inc', expression_attribute_values={ ":inc": _sfn_tasks.DynamoAttributeValue.from_number(1), ":start": _sfn_tasks.DynamoAttributeValue.from_number(0) }, result_path=_sfn.JsonPath.DISCARD ) definition = configure_datagen \ .next(create_cluster) \ .next(add_datagen_step) \ .next(update_iterator) \ .next(terminate_cluster) datagen_stepfunctions = _sfn.StateMachine( self, "BatchDataGenStepFunctions", definition=definition, timeout=core.Duration.minutes(30) ) datagen_stepfunctions.add_to_role_policy( _iam.PolicyStatement( actions=[ 'elasticmapreduce:AddJobFlowSteps', 'elasticmapreduce:DescribeStep' ], resources=['*'] ) ) datagen_stepfunctions.add_to_role_policy( _iam.PolicyStatement( actions= [ "iam:CreateServiceLinkedRole", "iam:PutRolePolicy" ], resources=["arn:aws:iam::*:role/aws-service-role/elasticmapreduce.amazonaws.com*/AWSServiceRoleForEMRCleanup*"], conditions= { "StringLike": { "iam:AWSServiceName": [ "elasticmapreduce.amazonaws.com", "elasticmapreduce.amazonaws.com.cn" ] } } ) ) step_trigger = _events.Rule( self, 'BatchSteptrigger', schedule=_events.Schedule.cron(minute='0/30', hour='*', month='*', week_day='*', year='*') ) step_trigger.add_target(_events_targets.SfnStateMachine(machine=datagen_stepfunctions)) with open('common/common_cdk/lambda/stepfunctions_trigger.py', 'r') as f: lambda_source = f.read() stepfunctions_trigger_lambda = _lambda.SingletonFunction( self, 'BatchStepFunctionsTriggerLambda', uuid="9597f6f2-f840-11ea-adc1-0242ac120002", runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', function_name='stepfunctions-batch-datagen-trigger' ) stepfunctions_trigger_lambda.role.add_to_policy( _iam.PolicyStatement( actions=["states:StartExecution"], resources=['*'] ) ) trigger_step_lambda_provider = _custom_resources.Provider( self, 'StepFunctionsTriggerLambdaProvider', on_event_handler=stepfunctions_trigger_lambda ) core.CustomResource( self, 'StepFunctionsTrigger', service_token=trigger_step_lambda_provider.service_token, properties={ "stepArn": datagen_stepfunctions.state_machine_arn } ) # terminate clusters with open('common/common_cdk/lambda/stepfunctions_terminate_emr.py', 'r') as f: lambda_source = f.read() sfn_terminate = _lambda.SingletonFunction( self, 'StepFuncTerminateBatch', uuid='58a9a422-ff07-11ea-adc1-0242ac120002', runtime=_lambda.Runtime.PYTHON_3_7, code=_lambda.Code.inline(lambda_source), handler='index.handler', timeout=core.Duration.minutes(5) ) sfn_terminate.role.add_to_policy( _iam.PolicyStatement( actions=[ 'elasticmapreduce:ListClusters', 'elasticmapreduce:TerminateJobFlows', 'states:ListStateMachines', 'states:ListExecutions', 'states:StopExecution' ], resources=['*'] ) ) sfn_terminate_provider = _custom_resources.Provider( self, 'StepFuncTerminateBatchLambdaProvider', on_event_handler=sfn_terminate ) core.CustomResource( self, 'StepFuncTerminateBatchCustomResource', service_token=sfn_terminate_provider.service_token, properties={ "state_machine": 'BatchDatagen' })
class DataLakeFoundations(NestedStack): @property def raw_s3_bucket(self): return self.__raw_s3_bucket @property def clean_s3_bucket(self): return self.__clean_s3_bucket @property def curated_s3_bucket(self): return self.__curated_s3_bucket @property def raw_glue_db(self): return self.__raw_glue_db @property def clean_glue_db(self): return self.__clean_glue_db @property def curated_glue_db(self): return self.__curated_glue_db @property def audit_glue_db(self): return self.__audit_glue_db @property def logs_s3_bucket(self): return self.__logs_s3_bucket @property def vpc(self): return self.__vpc @property def private_subnets_selection(self): return self.__private_subnets @property def public_subnets_selection(self): return self.__public_subnets @property def admin_group(self): return self.__admin_group @property def analysts_group(self): return self.__analysts_group @property def developers_group(self): return self.__developers_group def __init__(self, scope: Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # implement the glue data catalog databases used in the data lake catalog = DataLakeCatalog(self, 'DataLakeCatalog') self.__raw_glue_db = catalog.raw_database self.__clean_glue_db = catalog.clean_database self.__curated_glue_db = catalog.transform_database self.__audit_glue_db = Database(self, 'AuditGlueDB', database_name='ara_audit_data_' + self.account) # implement the S3 buckets for the data lake storage = DataLakeStorage(self, 'DataLakeStorage') self.__logs_s3_bucket = AutoEmptyBucket( self, 'Logs', bucket_name='ara-logs-' + self.account, uuid=AutoEmptyConfig.FOUNDATIONS_UUID ).bucket self.__raw_s3_bucket = storage.raw_bucket self.__clean_s3_bucket = storage.clean_bucket self.__curated_s3_bucket = storage.transform_bucket AuditTrailGlue(self, 'GlueAudit', log_bucket=self.__logs_s3_bucket, audit_bucket=self.__curated_s3_bucket, audit_db=self.__audit_glue_db, audit_table=self.__curated_s3_bucket.bucket_name ) # the vpc used for the overall data lake (same vpc, different subnet for modules) self.__vpc = Vpc(self, 'Vpc') self.__public_subnets = self.__vpc.select_subnets(subnet_type=SubnetType.PUBLIC) self.__private_subnets = self.__vpc.select_subnets(subnet_type=SubnetType.PRIVATE) self.__vpc.add_gateway_endpoint("S3GatewayEndpoint", service=GatewayVpcEndpointAwsService.S3, subnets=[SubnetSelection(subnet_type=SubnetType.PUBLIC), SubnetSelection(subnet_type=SubnetType.PRIVATE)]) # IAM groups self.__admin_group = Group(self, 'GroupAdmins', group_name='ara-admins') self.__analysts_group = Group(self, 'GroupAnalysts', group_name='ara-analysts') self.__developers_group = Group(self, 'GroupDevelopers', group_name='ara-developers')