def __init__(self, scope: core.Construct, id: str, vpc_cidr: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) self._vpc = ec2.Vpc( self, id, cidr=vpc_cidr, enable_dns_hostnames=True, enable_dns_support=True, max_azs=2, subnet_configuration=[ ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC, name="BASTION", cidr_mask=24), ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE, name="ECS", cidr_mask=24), ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC, name="DBS", cidr_mask=24) ], nat_gateway_provider=ec2.NatProvider.gateway(), nat_gateway_subnets=ec2.SubnetSelection( one_per_az=True, subnet_group_name="BASTION"), gateway_endpoints={ 's3': ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3, subnets=[ ec2.SubnetSelection(one_per_az=True, subnet_type=ec2.SubnetType.PUBLIC) ]) })
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here vpc = aws_ec2.Vpc( self, "DynamodbVPC", max_azs=2, gateway_endpoints={ "DynamoDB": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB) }) #XXX: Another way to add DynamoDB VPC Endpoint #dynamo_db_endpoint = vpc.add_gateway_endpoint("DynamoDB", # service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB #) ddb_table = aws_dynamodb.Table( self, "SimpleDynamoDbTable", table_name="SimpleTable", # removal_policy=cdk.RemovalPolicy.DESTROY, partition_key=aws_dynamodb.Attribute( name="pkid", type=aws_dynamodb.AttributeType.STRING), sort_key=aws_dynamodb.Attribute( name="sortkey", type=aws_dynamodb.AttributeType.NUMBER), time_to_live_attribute="ttl", billing_mode=aws_dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5, )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here self.vpc = ec2.Vpc( self, "VPC", max_azs=3, cidr="10.10.0.0/16", # configuration will create 3 groups in 2 AZs = 6 subnets. subnet_configuration=[ ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC, name="PublicSubnet", cidr_mask=24), ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE, name="PrivateSubnet", cidr_mask=24) ], # nat_gateway_provider=ec2.NatProvider.gateway(), nat_gateways=2, gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3) }) self.vpc.add_flow_log("FlowLogS3", destination=ec2.FlowLogDestination.to_s3(), traffic_type=ec2.FlowLogTrafficType.REJECT) props["vpc"] = self.vpc
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) self.vpc = ec2.Vpc( self, "VPC", nat_gateways=1, subnet_configuration=[ ec2.SubnetConfiguration(name="public", subnet_type=ec2.SubnetType.PUBLIC, cidr_mask=24), ec2.SubnetConfiguration(name="private", subnet_type=ec2.SubnetType.PRIVATE, cidr_mask=24), ec2.SubnetConfiguration(name="isolated", subnet_type=ec2.SubnetType.ISOLATED, cidr_mask=24) ], gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3) }, flow_logs={"FlowLogs": ec2.FlowLogOptions()})
def provision_vpc(self, name: str, vpc: VPC): self.public_subnet_name = f"{name}-public" self.private_subnet_name = f"{name}-private" if not vpc.create: self.vpc = ec2.Vpc.from_lookup("Vpc", vpc_id=vpc.id) return nat_provider = ec2.NatProvider.gateway() self.vpc = ec2.Vpc( self.scope, "VPC", max_azs=vpc.max_azs, cidr=vpc.cidr, subnet_configuration=[ ec2.SubnetConfiguration( subnet_type=ec2.SubnetType.PUBLIC, name=self.public_subnet_name, cidr_mask=24, # can't use token ids ), ec2.SubnetConfiguration( subnet_type=ec2.SubnetType.PRIVATE, name=self.private_subnet_name, cidr_mask=24, # can't use token ids ), ], gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3), }, nat_gateway_provider=nat_provider, ) cdk.Tags.of(self.vpc).add("Name", name) cdk.CfnOutput(self.scope, "vpc-output", value=self.vpc.vpc_cidr_block) # ripped off this: https://github.com/aws/aws-cdk/issues/9573 pod_cidr = ec2.CfnVPCCidrBlock(self.scope, "PodCidr", vpc_id=self.vpc.vpc_id, cidr_block="100.64.0.0/16") c = 0 for az in self.vpc.availability_zones: pod_subnet = ec2.PrivateSubnet( self.scope, # this can't be okay f"{name}-pod-{c}", # Can't use parameter/token in this name vpc_id=self.vpc.vpc_id, availability_zone=az, cidr_block=f"100.64.{c}.0/18", ) pod_subnet.add_default_nat_route([ gw for gw in nat_provider.configured_gateways if gw.az == az ][0].gateway_id) pod_subnet.node.add_dependency(pod_cidr) # TODO: need to tag c += 64 for endpoint in [ "ec2", # Only these first three have predefined consts "sts", "ecr.api", "autoscaling", "ecr.dkr", ]: # TODO: Do we need an s3 interface as well? or just the gateway? self.vpc_endpoint = ec2.InterfaceVpcEndpoint( self.scope, f"{endpoint}-ENDPOINT", vpc=self.vpc, service=ec2.InterfaceVpcEndpointAwsService(endpoint, port=443), # private_dns_enabled=True, subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE), )
def __init__(self, scope: Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here vpc = aws_ec2.Vpc(self, "NeptuneHolVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3 ) } ) sg_use_graph_db = aws_ec2.SecurityGroup(self, "NeptuneClientSG", vpc=vpc, allow_all_outbound=True, description='security group for neptune client', security_group_name='use-neptune-client' ) cdk.Tags.of(sg_use_graph_db).add('Name', 'use-neptune-client') sg_graph_db = aws_ec2.SecurityGroup(self, "NeptuneSG", vpc=vpc, allow_all_outbound=True, description='security group for neptune', security_group_name='neptune-server' ) cdk.Tags.of(sg_graph_db).add('Name', 'neptune-server') sg_graph_db.add_ingress_rule(peer=sg_graph_db, connection=aws_ec2.Port.tcp(8182), description='neptune-server') sg_graph_db.add_ingress_rule(peer=sg_use_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-neptune-client') graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(self, 'NeptuneHolSubnetGroup', db_subnet_group_description='subnet group for neptune hol', subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids, db_subnet_group_name='neptune-hol' ) graph_db = aws_neptune.CfnDBCluster(self, 'NeptuneHol', availability_zones=vpc.availability_zones, db_subnet_group_name=graph_db_subnet_group.db_subnet_group_name, db_cluster_identifier='neptune-hol', backup_retention_period=1, preferred_backup_window='08:45-09:15', preferred_maintenance_window='sun:18:00-sun:18:30', vpc_security_group_ids=[sg_graph_db.security_group_id] ) graph_db.add_depends_on(graph_db_subnet_group) graph_db_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolInstance', db_instance_class='db.r5.large', allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=graph_db.db_cluster_identifier, db_instance_identifier='neptune-hol', preferred_maintenance_window='sun:18:00-sun:18:30' ) graph_db_instance.add_depends_on(graph_db) graph_db_replica_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolReplicaInstance', db_instance_class='db.r5.large', allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=graph_db.db_cluster_identifier, db_instance_identifier='neptune-hol-replica', preferred_maintenance_window='sun:18:00-sun:18:30' ) graph_db_replica_instance.add_depends_on(graph_db) graph_db_replica_instance.add_depends_on(graph_db_instance) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*"], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".format( region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, cluster_id=graph_db.attr_cluster_resource_id)], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role(self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-NeptuneHol', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc } ) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=graph_db.attr_endpoint, NeptuneClusterPort=graph_db.attr_port, AWS_Region=cdk.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=cdk.Fn.base64(neptune_wb_lifecycle_content) ) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name='NeptuneWorkbenchLifeCycleConfig', on_start=[neptune_wb_lifecycle_config_prop] ) neptune_workbench = aws_sagemaker.CfnNotebookInstance(self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config.notebook_instance_lifecycle_config_name, notebook_instance_name='NeptuneHolWorkbench', root_access='Disabled', security_group_ids=[sg_use_graph_db.security_group_id], subnet_id=graph_db_subnet_group.subnet_ids[0] )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) vpc = aws_ec2.Vpc( self, "OctemberVPC", max_azs=2, # subnet_configuration=[{ # "cidrMask": 24, # "name": "Public", # "subnetType": aws_ec2.SubnetType.PUBLIC, # }, # { # "cidrMask": 24, # "name": "Private", # "subnetType": aws_ec2.SubnetType.PRIVATE # }, # { # "cidrMask": 28, # "name": "Isolated", # "subnetType": aws_ec2.SubnetType.ISOLATED, # "reserved": True # } # ], gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) dynamo_db_endpoint = vpc.add_gateway_endpoint( "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB) s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="octember-bizcard-{region}-{account}".format( region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID)) api = apigw.RestApi( self, "BizcardImageUploader", rest_api_name="BizcardImageUploader", description="This service serves uploading bizcard images into s3.", endpoint_types=[apigw.EndpointType.REGIONAL], binary_media_types=["image/png", "image/jpg"], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) rest_api_role = aws_iam.Role( self, "ApiGatewayRoleForS3", role_name="ApiGatewayRoleForS3FullAccess", assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3FullAccess") ]) list_objects_responses = [ apigw.IntegrationResponse( status_code="200", #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters # The response parameters from the backend response that API Gateway sends to the method response. # Use the destination as the key and the source as the value: # - The destination must be an existing response parameter in the MethodResponse property. # - The source must be an existing method request parameter or a static value. response_parameters={ 'method.response.header.Timestamp': 'integration.response.header.Date', 'method.response.header.Content-Length': 'integration.response.header.Content-Length', 'method.response.header.Content-Type': 'integration.response.header.Content-Type' }), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ] list_objects_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses) get_s3_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path='/', options=list_objects_integration_options) api.root.add_method( "GET", get_s3_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={'method.request.header.Content-Type': False}) get_s3_folder_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value. # The source must be an existing method request parameter or a static value. request_parameters={ "integration.request.path.bucket": "method.request.path.folder" }) get_s3_folder_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}", options=get_s3_folder_integration_options) s3_folder = api.root.add_resource('{folder}') s3_folder.add_method( "GET", get_s3_folder_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True }) get_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, request_parameters={ "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) get_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}/{object}", options=get_s3_item_integration_options) s3_item = s3_folder.add_resource('{item}') s3_item.add_method( "GET", get_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) put_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=[ apigw.IntegrationResponse(status_code="200"), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ], request_parameters={ "integration.request.header.Content-Type": "method.request.header.Content-Type", "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) put_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="PUT", path="{bucket}/{object}", options=put_s3_item_integration_options) s3_item.add_method( "PUT", put_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) ddb_table = dynamodb.Table( self, "BizcardImageMetaInfoDdbTable", table_name="OctemberBizcardImgMeta", partition_key=dynamodb.Attribute( name="image_id", type=dynamodb.AttributeType.STRING), billing_mode=dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5) img_kinesis_stream = kinesis.Stream( self, "BizcardImagePath", stream_name="octember-bizcard-image") # create lambda function trigger_textract_lambda_fn = _lambda.Function( self, "TriggerTextExtractorFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="TriggerTextExtractorFromImage", handler="trigger_text_extract_from_s3_image.lambda_handler", description="Trigger to extract text from an image in S3", code=_lambda.Code.asset( "./src/main/python/TriggerTextExtractFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) ddb_table_rw_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ddb_table.table_arn], actions=[ "dynamodb:BatchGetItem", "dynamodb:Describe*", "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query", "dynamodb:Scan", "dynamodb:BatchWriteItem", "dynamodb:DeleteItem", "dynamodb:PutItem", "dynamodb:UpdateItem", "dax:Describe*", "dax:List*", "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan", "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem", "dax:UpdateItem" ]) trigger_textract_lambda_fn.add_to_role_policy( ddb_table_rw_policy_statement) trigger_textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[img_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) # assign notification for the s3 event type (ex: OBJECT_CREATED) s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/", suffix=".jpg") s3_event_source = S3EventSource(s3_bucket, events=[s3.EventType.OBJECT_CREATED], filters=[s3_event_filter]) trigger_textract_lambda_fn.add_event_source(s3_event_source) #XXX: https://github.com/aws/aws-cdk/issues/2240 # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props log_group = aws_logs.LogGroup( self, "TriggerTextractLogGroup", log_group_name="/aws/lambda/TriggerTextExtractorFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(trigger_textract_lambda_fn) text_kinesis_stream = kinesis.Stream( self, "BizcardTextData", stream_name="octember-bizcard-txt") textract_lambda_fn = _lambda.Function( self, "GetTextFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="GetTextFromImage", handler="get_text_from_s3_image.lambda_handler", description="extract text from an image in S3", code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["textract:*"])) img_kinesis_event_source = KinesisEventSource( img_kinesis_stream, batch_size=100, starting_position=_lambda.StartingPosition.LATEST) textract_lambda_fn.add_event_source(img_kinesis_event_source) log_group = aws_logs.LogGroup( self, "GetTextFromImageLogGroup", log_group_name="/aws/lambda/GetTextFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(textract_lambda_fn) sg_use_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard elasticsearch client', security_group_name='use-octember-bizcard-es') core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es') sg_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard elasticsearch', security_group_name='octember-bizcard-es') core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='use-octember-bizcard-es') sg_ssh_access = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for bastion host', security_group_name='octember-bastion-host-sg') core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host') sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(), connection=aws_ec2.Port.tcp(22), description='ssh access') bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=aws_ec2.InstanceType('t3.nano'), security_group=sg_ssh_access, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC)) bastion_host.instance.add_security_group(sg_use_bizcard_es) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_cfn_domain = aws_elasticsearch.CfnDomain( self, 'BizcardSearch', elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name="octember-bizcard", elasticsearch_version="7.9", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn(service="es", resource="domain", resource_name="octember-bizcard/*") }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_bizcard_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es') s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name") #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, s3_lib_bucket_name) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-es-lib.zip")) redis_lib_layer = _lambda.LayerVersion( self, "RedisLib", layer_version_name="redis-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-redis-lib.zip")) #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertBizcardToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToElasticSearch", handler="upsert_bizcard_to_es.lambda_handler", description="Upsert bizcard text into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_bizcard_es], vpc=vpc) text_kinesis_event_source = KinesisEventSource( text_kinesis_stream, batch_size=99, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToESLogGroup", log_group_name="/aws/lambda/UpsertBizcardToElasticSearch", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "BizcardTextToS3", delivery_stream_name="octember-bizcard-txt-to-s3", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": text_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "GZIP", "prefix": "bizcard-text/", "roleArn": firehose_role.role_arn }) sg_use_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache client', security_group_name='use-octember-bizcard-es-cache') core.Tags.of(sg_use_bizcard_es_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache', security_group_name='octember-bizcard-es-cache') core.Tags.of(sg_bizcard_es_cache).add('Name', 'octember-bizcard-es-cache') sg_bizcard_es_cache.add_ingress_rule( peer=sg_use_bizcard_es_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-es-cache') es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "QueryCacheSubnetGroup", description="subnet group for octember-bizcard-es-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-es-cache') es_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardSearchQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-es-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-es-cache', vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id]) #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster. # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname es_query_cache.add_depends_on(es_query_cache_subnet_group) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 bizcard_search_lambda_fn = _lambda.Function( self, "BizcardSearchServer", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardSearchProxy", handler="es_search_bizcard.lambda_handler", description="Proxy server to search bizcard text", code=_lambda.Code.asset("./src/main/python/SearchBizcard"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard', 'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[es_lib_layer, redis_lib_layer], security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache], vpc=vpc) #XXX: create API Gateway + LambdaProxy search_api = apigw.LambdaRestApi( self, "BizcardSearchAPI", handler=bizcard_search_lambda_fn, proxy=False, rest_api_name="BizcardSearch", description="This service serves searching bizcard text.", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_search = search_api.root.add_resource('search') bizcard_search.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sg_use_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbClientSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db client', security_group_name='use-octember-bizcard-neptune') core.Tags.of(sg_use_bizcard_graph_db).add( 'Name', 'use-octember-bizcard-neptune') sg_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db', security_group_name='octember-bizcard-neptune') core.Tags.of(sg_bizcard_graph_db).add('Name', 'octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_use_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-octember-bizcard-neptune') bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup( self, "NeptuneSubnetGroup", db_subnet_group_description= "subnet group for octember-bizcard-neptune", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, db_subnet_group_name='octember-bizcard-neptune') bizcard_graph_db = aws_neptune.CfnDBCluster( self, "BizcardGraphDB", availability_zones=vpc.availability_zones, db_subnet_group_name=bizcard_graph_db_subnet_group. db_subnet_group_name, db_cluster_identifier="octember-bizcard", backup_retention_period=1, preferred_backup_window="08:45-09:15", preferred_maintenance_window="sun:18:00-sun:18:30", vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id]) bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group) bizcard_graph_db_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBReplicaInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard-replica", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance.add_depends_on( bizcard_graph_db_instance) gremlinpython_lib_layer = _lambda.LayerVersion( self, "GremlinPythonLib", layer_version_name="gremlinpython-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket( s3_lib_bucket, "var/octember-gremlinpython-lib.zip")) #XXX: https://github.com/aws/aws-cdk/issues/1342 upsert_to_neptune_lambda_fn = _lambda.Function( self, "UpsertBizcardToGraphDB", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToNeptune", handler="upsert_bizcard_to_graph_db.lambda_handler", description="Upsert bizcard into neptune", code=_lambda.Code.asset( "./src/main/python/UpsertBizcardToGraphDB"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port }, timeout=core.Duration.minutes(5), layers=[gremlinpython_lib_layer], security_groups=[sg_use_bizcard_graph_db], vpc=vpc) upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToGraphDBLogGroup", log_group_name="/aws/lambda/UpsertBizcardToNeptune", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_neptune_lambda_fn) sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache client', security_group_name='use-octember-bizcard-neptune-cache') core.Tags.of(sg_use_bizcard_neptune_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache', security_group_name='octember-bizcard-neptune-cache') core.Tags.of(sg_bizcard_neptune_cache).add( 'Name', 'octember-bizcard-neptune-cache') sg_bizcard_neptune_cache.add_ingress_rule( peer=sg_use_bizcard_neptune_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-neptune-cache') recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "RecommQueryCacheSubnetGroup", description="subnet group for octember-bizcard-neptune-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-neptune-cache') recomm_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardRecommQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-neptune-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-neptune-cache', vpc_security_group_ids=[ sg_bizcard_neptune_cache.security_group_id ]) recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group) bizcard_recomm_lambda_fn = _lambda.Function( self, "BizcardRecommender", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardRecommender", handler="neptune_recommend_bizcard.lambda_handler", description="This service serves PYMK(People You May Know).", code=_lambda.Code.asset("./src/main/python/RecommendBizcard"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port, 'ELASTICACHE_HOST': recomm_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[gremlinpython_lib_layer, redis_lib_layer], security_groups=[ sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache ], vpc=vpc) #XXX: create API Gateway + LambdaProxy recomm_api = apigw.LambdaRestApi( self, "BizcardRecommendAPI", handler=bizcard_recomm_lambda_fn, proxy=False, rest_api_name="BizcardRecommend", description="This service serves PYMK(People You May Know).", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_recomm = recomm_api.root.add_resource('pymk') bizcard_recomm.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*" ], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*". format(region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, cluster_id=bizcard_graph_db. attr_cluster_resource_id) ], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-OctemberBizcard', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc }) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint, NeptuneClusterPort=bizcard_graph_db.attr_port, AWS_Region=core.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(neptune_wb_lifecycle_content)) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'AWSNeptuneWorkbenchOctemberBizcardLCConfig', on_start=[neptune_wb_lifecycle_config_prop]) neptune_workbench = aws_sagemaker.CfnNotebookInstance( self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='OctemberBizcard-NeptuneWorkbench', root_access='Disabled', security_group_ids=[sg_use_bizcard_graph_db.security_group_name], subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here vpc = aws_ec2.Vpc( self, "AnalyticsWorkshopVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') core.Tag.add(sg_bastion_host, 'Name', 'bastion-host-sg') #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) #XXX: In order to test data pipeline, add {Kinesis, KinesisFirehose}FullAccess Policy to the bastion host. bastion_host.role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["kinesis:*"])) bastion_host.role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["firehose:*"])) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') core.Tag.add(sg_use_es, 'Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') core.Tag.add(sg_es, 'Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="aws-analytics-immersion-day-{region}-{account}". format(region=kwargs['env'].region, account=kwargs['env'].account)) trans_kinesis_stream = kinesis.Stream( self, "AnalyticsWorkshopKinesisStreams", stream_name='retail-trans') firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[trans_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/retail-trans" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) trans_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToS3", delivery_stream_name="retail-trans", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": trans_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] "prefix": "json-data/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/", "errorOutputPrefix": "error-json/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}", "roleArn": firehose_role.role_arn }) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_domain_name = 'retail' es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=es_domain_name, elasticsearch_version="7.4", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(es_domain_name)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tag.add(es_cfn_domain, 'Name', 'analytics-workshop-es') #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, S3_BUCKET_LAMBDA_LAYER_LIB) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/es-lib.zip")) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertToES", handler="upsert_to_es.lambda_handler", description="Upsert records into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, #TODO: MUST set appropriate environment variables for your workloads. 'ES_INDEX': 'retail', 'ES_TYPE': 'trans', 'REQUIRED_FIELDS': 'Invoice,StockCode,Customer_ID', 'REGION_NAME': kwargs['env'].region, 'DATE_TYPE_FIELDS': 'InvoiceDate' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_es], vpc=vpc) trans_kinesis_event_source = KinesisEventSource( trans_kinesis_stream, batch_size=1000, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(trans_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertToESLogGroup", log_group_name="/aws/lambda/UpsertToES", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) merge_small_files_lambda_fn = _lambda.Function( self, "MergeSmallFiles", runtime=_lambda.Runtime.PYTHON_3_7, function_name="MergeSmallFiles", handler="athena_ctas.lambda_handler", description="Merge small files in S3", code=_lambda.Code.asset("./src/main/python/MergeSmallFiles"), environment={ #TODO: MUST set appropriate environment variables for your workloads. 'OLD_DATABASE': 'mydatabase', 'OLD_TABLE_NAME': 'retail_trans_json', 'NEW_DATABASE': 'mydatabase', 'NEW_TABLE_NAME': 'ctas_retail_trans_parquet', 'WORK_GROUP': 'primary', 'OUTPUT_PREFIX': 's3://{}'.format( os.path.join(s3_bucket.bucket_name, 'parquet-retail-trans')), 'STAGING_OUTPUT_PREFIX': 's3://{}'.format(os.path.join(s3_bucket.bucket_name, 'tmp')), 'COLUMN_NAMES': 'invoice,stockcode,description,quantity,invoicedate,price,customer_id,country', }, timeout=core.Duration.minutes(5)) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["athena:*"])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "s3:Get*", "s3:List*", "s3:AbortMultipartUpload", "s3:PutObject", ])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:CreateDatabase", "glue:DeleteDatabase", "glue:GetDatabase", "glue:GetDatabases", "glue:UpdateDatabase", "glue:CreateTable", "glue:DeleteTable", "glue:BatchDeleteTable", "glue:UpdateTable", "glue:GetTable", "glue:GetTables", "glue:BatchCreatePartition", "glue:CreatePartition", "glue:DeletePartition", "glue:BatchDeletePartition", "glue:UpdatePartition", "glue:GetPartition", "glue:GetPartitions", "glue:BatchGetPartition" ])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["lakeformation:GetDataAccess"])) lambda_fn_target = aws_events_targets.LambdaFunction( merge_small_files_lambda_fn) aws_events.Rule(self, "ScheduleRule", schedule=aws_events.Schedule.cron(minute="5"), targets=[lambda_fn_target]) log_group = aws_logs.LogGroup( self, "MergeSmallFilesLogGroup", log_group_name="/aws/lambda/MergeSmallFiles", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(merge_small_files_lambda_fn)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) vpc = aws_ec2.Vpc( self, "ElasticsearchHolVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_domain_name = 'es-hol' es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=es_domain_name, elasticsearch_version="7.7", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(es_domain_name)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids }) cdk.Tags.of(es_cfn_domain).add('Name', 'es-hol') cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'BastionHostPublicDNSName', value=bastion_host.instance_public_dns_name, export_name='BastionHostPublicDNSName') cdk.CfnOutput(self, 'ESDomainEndpoint', value=es_cfn_domain.attr_domain_endpoint, export_name='ESDomainEndpoint') cdk.CfnOutput( self, 'ESDashboardsURL', value=f"{es_cfn_domain.attr_domain_endpoint}/_dashboards/", export_name='ESDashboardsURL')
def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Version of ParallelCluster for Cloud9. pcluster_version = cdk.CfnParameter( self, 'ParallelClusterVersion', description= 'Specify a custom parallelcluster version. See https://pypi.org/project/aws-parallelcluster/#history for options.', default='2.8.0', type='String', allowed_values=get_version_list('aws-parallelcluster')) # S3 URI for Config file config = cdk.CfnParameter( self, 'ConfigS3URI', description='Set a custom parallelcluster config file.', default= 'https://notearshpc-quickstart.s3.amazonaws.com/{0}/config.ini'. format(__version__)) # Password password = cdk.CfnParameter( self, 'UserPasswordParameter', description='Set a password for the hpc-quickstart user', no_echo=True) # create a VPC vpc = ec2.Vpc( self, 'VPC', cidr='10.0.0.0/16', gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3), "DynamoDB": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.DYNAMODB) }, max_azs=99) # create a private and public subnet per vpc selection = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE) # Output created subnets for i, public_subnet in enumerate(vpc.public_subnets): cdk.CfnOutput(self, 'PublicSubnet%i' % i, value=public_subnet.subnet_id) for i, private_subnet in enumerate(vpc.private_subnets): cdk.CfnOutput(self, 'PrivateSubnet%i' % i, value=private_subnet.subnet_id) cdk.CfnOutput(self, 'VPCId', value=vpc.vpc_id) # Create a Bucket data_bucket = s3.Bucket(self, "DataRepository") cdk.CfnOutput(self, 'DataRespository', value=data_bucket.bucket_name) cloudtrail_bucket = s3.Bucket(self, "CloudTrailLogs") quickstart_bucket = s3.Bucket.from_bucket_name(self, 'QuickStartBucket', 'aws-quickstart') # Upload Bootstrap Script to that bucket bootstrap_script = assets.Asset(self, 'BootstrapScript', path='scripts/bootstrap.sh') # Upload parallel cluster post_install_script to that bucket pcluster_post_install_script = assets.Asset( self, 'PclusterPostInstallScript', path='scripts/post_install_script.sh') # Upload parallel cluster post_install_script to that bucket pcluster_config_script = assets.Asset(self, 'PclusterConfigScript', path='scripts/config.ini') # Setup CloudTrail cloudtrail.Trail(self, 'CloudTrail', bucket=cloudtrail_bucket) # Create a Cloud9 instance # Cloud9 doesn't have the ability to provide userdata # Because of this we need to use SSM run command cloud9_instance = cloud9.Ec2Environment( self, 'ResearchWorkspace', vpc=vpc, instance_type=ec2.InstanceType( instance_type_identifier='c5.large')) cdk.CfnOutput(self, 'Research Workspace URL', value=cloud9_instance.ide_url) # Create a keypair in lambda and store the private key in SecretsManager c9_createkeypair_role = iam.Role( self, 'Cloud9CreateKeypairRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) c9_createkeypair_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole')) # Add IAM permissions to the lambda role c9_createkeypair_role.add_to_policy( iam.PolicyStatement( actions=['ec2:CreateKeyPair', 'ec2:DeleteKeyPair'], resources=['*'], )) # Lambda for Cloud9 keypair c9_createkeypair_lambda = _lambda.Function( self, 'C9CreateKeyPairLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(300), role=c9_createkeypair_role, code=_lambda.Code.asset('functions/source/c9keypair'), ) c9_createkeypair_provider = cr.Provider( self, "C9CreateKeyPairProvider", on_event_handler=c9_createkeypair_lambda) c9_createkeypair_cr = cfn.CustomResource( self, "C9CreateKeyPair", provider=c9_createkeypair_provider, properties={'ServiceToken': c9_createkeypair_lambda.function_arn}) #c9_createkeypair_cr.node.add_dependency(instance_id) c9_ssh_private_key_secret = secretsmanager.CfnSecret( self, 'SshPrivateKeySecret', secret_string=c9_createkeypair_cr.get_att_string('PrivateKey')) # The iam policy has a <REGION> parameter that needs to be replaced. # We do it programmatically so future versions of the synth'd stack # template include all regions. with open('iam/ParallelClusterUserPolicy.json') as json_file: data = json.load(json_file) for s in data['Statement']: if s['Sid'] == 'S3ParallelClusterReadOnly': s['Resource'] = [] for r in region_info.RegionInfo.regions: s['Resource'].append( 'arn:aws:s3:::{0}-aws-parallelcluster*'.format( r.name)) parallelcluster_user_policy = iam.CfnManagedPolicy( self, 'ParallelClusterUserPolicy', policy_document=iam.PolicyDocument.from_json(data)) # Cloud9 IAM Role cloud9_role = iam.Role( self, 'Cloud9Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_managed_policy_arn( self, 'AttachParallelClusterUserPolicy', parallelcluster_user_policy.ref)) cloud9_role.add_to_policy( iam.PolicyStatement(resources=['*'], actions=[ 'ec2:DescribeInstances', 'ec2:DescribeVolumes', 'ec2:ModifyVolume' ])) cloud9_role.add_to_policy( iam.PolicyStatement(resources=[c9_ssh_private_key_secret.ref], actions=['secretsmanager:GetSecretValue'])) cloud9_role.add_to_policy( iam.PolicyStatement( actions=["s3:Get*", "s3:List*"], resources=[ "arn:aws:s3:::%s/*" % (data_bucket.bucket_name), "arn:aws:s3:::%s" % (data_bucket.bucket_name) ])) bootstrap_script.grant_read(cloud9_role) pcluster_post_install_script.grant_read(cloud9_role) pcluster_config_script.grant_read(cloud9_role) # Admin Group admin_group = iam.Group(self, 'AdminGroup') admin_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AdministratorAccess')) admin_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloud9Administrator')) # PowerUser Group poweruser_group = iam.Group(self, 'PowerUserGroup') poweruser_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name('PowerUserAccess')) poweruser_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloud9Administrator')) # HPC User user = iam.CfnUser( self, 'Researcher', groups=[admin_group.node.default_child.ref], login_profile=iam.CfnUser.LoginProfileProperty( password_reset_required=True, password=cdk.SecretValue.cfn_parameter(password).to_string())) create_user = cdk.CfnParameter(self, "CreateUser", default="false", type="String", allowed_values=['true', 'false' ]).value_as_string user_condition = cdk.CfnCondition(self, "UserCondition", expression=cdk.Fn.condition_equals( create_user, "true")) user.cfn_options.condition = user_condition cdk.CfnOutput(self, 'UserLoginUrl', value="".join([ "https://", self.account, ".signin.aws.amazon.com/console" ]), condition=user_condition) cdk.CfnOutput(self, 'UserName', value=user.ref, condition=user_condition) # Cloud9 Setup IAM Role cloud9_setup_role = iam.Role( self, 'Cloud9SetupRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) cloud9_setup_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole')) # Allow pcluster to be run in bootstrap cloud9_setup_role.add_managed_policy( iam.ManagedPolicy.from_managed_policy_arn( self, 'AttachParallelClusterUserPolicySetup', parallelcluster_user_policy.ref)) # Add IAM permissions to the lambda role cloud9_setup_role.add_to_policy( iam.PolicyStatement( actions=[ 'cloudformation:DescribeStackResources', 'ec2:AssociateIamInstanceProfile', 'ec2:AuthorizeSecurityGroupIngress', 'ec2:DescribeInstances', 'ec2:DescribeInstanceStatus', 'ec2:DescribeInstanceAttribute', 'ec2:DescribeIamInstanceProfileAssociations', 'ec2:DescribeVolumes', 'ec2:DesctibeVolumeAttribute', 'ec2:DescribeVolumesModifications', 'ec2:DescribeVolumeStatus', 'ssm:DescribeInstanceInformation', 'ec2:ModifyVolume', 'ec2:ReplaceIamInstanceProfileAssociation', 'ec2:ReportInstanceStatus', 'ssm:SendCommand', 'ssm:GetCommandInvocation', 's3:GetObject', 'lambda:AddPermission', 'lambda:RemovePermission', 'events:PutRule', 'events:DeleteRule', 'events:PutTargets', 'events:RemoveTargets', 'cloud9:CreateEnvironmentMembership', ], resources=['*'], )) cloud9_setup_role.add_to_policy( iam.PolicyStatement(actions=['iam:PassRole'], resources=[cloud9_role.role_arn])) cloud9_setup_role.add_to_policy( iam.PolicyStatement( actions=['lambda:AddPermission', 'lambda:RemovePermission'], resources=['*'])) # Cloud9 Instance Profile c9_instance_profile = iam.CfnInstanceProfile( self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name]) # Lambda to add Instance Profile to Cloud9 c9_instance_profile_lambda = _lambda.Function( self, 'C9InstanceProfileLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9InstanceProfile'), ) c9_instance_profile_provider = cr.Provider( self, "C9InstanceProfileProvider", on_event_handler=c9_instance_profile_lambda, ) instance_id = cfn.CustomResource(self, "C9InstanceProfile", provider=c9_instance_profile_provider, properties={ 'InstanceProfile': c9_instance_profile.ref, 'Cloud9Environment': cloud9_instance.environment_id, }) instance_id.node.add_dependency(cloud9_instance) # Lambda for Cloud9 Bootstrap c9_bootstrap_lambda = _lambda.Function( self, 'C9BootstrapLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9bootstrap'), ) c9_bootstrap_provider = cr.Provider( self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda) c9_bootstrap_cr = cfn.CustomResource( self, "C9Bootstrap", provider=c9_bootstrap_provider, properties={ 'Cloud9Environment': cloud9_instance.environment_id, 'BootstrapPath': 's3://%s/%s' % (bootstrap_script.s3_bucket_name, bootstrap_script.s3_object_key), 'Config': config, 'VPCID': vpc.vpc_id, 'MasterSubnetID': vpc.public_subnets[0].subnet_id, 'ComputeSubnetID': vpc.private_subnets[0].subnet_id, 'PostInstallScriptS3Url': "".join([ 's3://', pcluster_post_install_script.s3_bucket_name, "/", pcluster_post_install_script.s3_object_key ]), 'PostInstallScriptBucket': pcluster_post_install_script.s3_bucket_name, 'S3ReadWriteResource': data_bucket.bucket_arn, 'S3ReadWriteUrl': 's3://%s' % (data_bucket.bucket_name), 'KeyPairId': c9_createkeypair_cr.ref, 'KeyPairSecretArn': c9_ssh_private_key_secret.ref, 'UserArn': user.attr_arn, 'PclusterVersion': pcluster_version.value_as_string }) c9_bootstrap_cr.node.add_dependency(instance_id) c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr) c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret) c9_bootstrap_cr.node.add_dependency(data_bucket) enable_budget = cdk.CfnParameter(self, "EnableBudget", default="true", type="String", allowed_values=['true', 'false' ]).value_as_string # Budgets budget_properties = { 'budgetType': "COST", 'timeUnit': "ANNUALLY", 'budgetLimit': { 'amount': cdk.CfnParameter( self, 'BudgetLimit', description= 'The initial budget for this project in USD ($).', default=2000, type='Number').value_as_number, 'unit': "USD", }, 'costFilters': None, 'costTypes': { 'includeCredit': False, 'includeDiscount': True, 'includeOtherSubscription': True, 'includeRecurring': True, 'includeRefund': True, 'includeSubscription': True, 'includeSupport': True, 'includeTax': True, 'includeUpfront': True, 'useAmortized': False, 'useBlended': False, }, 'plannedBudgetLimits': None, 'timePeriod': None, } email = { 'notification': { 'comparisonOperator': "GREATER_THAN", 'notificationType': "ACTUAL", 'threshold': 80, 'thresholdType': "PERCENTAGE", }, 'subscribers': [{ 'address': cdk.CfnParameter( self, 'NotificationEmail', description= 'This email address will receive billing alarm notifications when 80% of the budget limit is reached.', default='*****@*****.**').value_as_string, 'subscriptionType': "EMAIL", }] } overall_budget = budgets.CfnBudget( self, "HPCBudget", budget=budget_properties, notifications_with_subscribers=[email], ) overall_budget.cfn_options.condition = cdk.CfnCondition( self, "BudgetCondition", expression=cdk.Fn.condition_equals(enable_budget, "true"))
def __init__( self, scope: core.Construct, id: str, cidr_block: str, platform_identifier: str = 'covariate-ingest', **kwargs ) -> None: super().__init__(scope, id, **kwargs) self.lambda_function_role_name = f'{platform_identifier}-lambda-function' self.node.set_context('lambda_function_role_name', self.lambda_function_role_name) self.batch_job_role_name = f'{platform_identifier}-batch-job' self.node.set_context('batch_job_role_name', self.batch_job_role_name) self.vpc = ec2.Vpc( self, "vpc", enable_dns_hostnames=True, enable_dns_support=True, flow_logs={ "default": ec2.FlowLogOptions( destination=ec2.FlowLogDestination.to_cloud_watch_logs() ) }, # max_azs=99, # Means use all AZs max_azs=3, cidr=cidr_block, # configuration will create a subnet for each config, in each AZ. # So us-east-1 3 public, and 3 private subnet_configuration=[ ec2.SubnetConfiguration( name="Public", cidr_mask=24, subnet_type=ec2.SubnetType.PUBLIC, ), ec2.SubnetConfiguration( subnet_type=ec2.SubnetType.PRIVATE, name="Private", cidr_mask=20 ) ], gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3 ) }, ) self.vpc.add_interface_endpoint( "EcrDockerEndpoint", service=ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER ) # Public NACL self.nacl_public = ec2.NetworkAcl( self, "nacl_public", vpc=self.vpc, subnet_selection=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PUBLIC ) ) self.nacl_public.add_entry( "in-rule", rule_number=95, cidr=ec2.AclCidr.any_ipv4(), rule_action=ec2.Action.ALLOW, direction=ec2.TrafficDirection.INGRESS, traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535) ) self.nacl_public.add_entry( "out-rule", rule_number=95, cidr=ec2.AclCidr.any_ipv4(), rule_action=ec2.Action.ALLOW, direction=ec2.TrafficDirection.EGRESS, traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65535) ) # Private NACL self.nacl_private = ec2.NetworkAcl( self, "nacl_private", vpc=self.vpc, subnet_selection=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE ) ) self.nacl_private.add_entry( "in-rule", rule_number=95, cidr=ec2.AclCidr.any_ipv4(), rule_action=ec2.Action.ALLOW, direction=ec2.TrafficDirection.INGRESS, traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432) ) self.nacl_private.add_entry( "out-rule", rule_number=95, cidr=ec2.AclCidr.any_ipv4(), rule_action=ec2.Action.ALLOW, direction=ec2.TrafficDirection.EGRESS, traffic=ec2.AclTraffic.tcp_port_range(start_port=0, end_port=65432) ) # Add Batch Compute Envs cpu_instances = [ ec2.InstanceType('c5.large'), ec2.InstanceType('c5.xlarge'), ec2.InstanceType('c5.2xlarge'), ec2.InstanceType('c5.4xlarge'), ec2.InstanceType('m5.large'), ec2.InstanceType('m5.xlarge'), ec2.InstanceType('m5.2xlarge'), ec2.InstanceType('m5.4xlarge'), ] self.cpu_on_demand = batch.ComputeEnvironment( self, 'batch-cpu-on-demand', managed=True, enabled=True, compute_resources=batch.ComputeResources( vpc=self.vpc, # Will select only private subnets. type=batch.ComputeResourceType.ON_DEMAND, allocation_strategy=batch.AllocationStrategy. BEST_FIT_PROGRESSIVE, minv_cpus=0, maxv_cpus=640, desiredv_cpus=0, instance_types=cpu_instances, image=ecs.EcsOptimizedImage.amazon_linux2( hardware_type=ecs.AmiHardwareType.STANDARD ), ), ) self.cpu_spot = batch.ComputeEnvironment( self, 'batch-cpu-spot', managed=True, enabled=True, compute_resources=batch.ComputeResources( vpc=self.vpc, # Will select only private subnets. type=batch.ComputeResourceType.SPOT, allocation_strategy=batch.AllocationStrategy. SPOT_CAPACITY_OPTIMIZED, bid_percentage=80, minv_cpus=0, maxv_cpus=640, desiredv_cpus=0, instance_types=cpu_instances, image=ecs.EcsOptimizedImage.amazon_linux2( hardware_type=ecs.AmiHardwareType.STANDARD ), ), ) self.cpu_spot_first = batch.JobQueue( self, 'cpu-spot-first', job_queue_name=f'{platform_identifier}-cpu-queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=self.cpu_spot, order=1 ), batch.JobQueueComputeEnvironment( compute_environment=self.cpu_on_demand, order=2 ), ], enabled=True, priority=10 ) self.lambda_function_role = iam.Role( self, 'lambda-function-role', role_name=self.lambda_function_role_name, description='', assumed_by=iam.ServicePrincipal(service='lambda.amazonaws.com'), ) self.batch_job_role = iam.Role( self, 'batch-job-role', role_name=self.batch_job_role_name, description='', assumed_by=iam.ServicePrincipal(service='ecs-tasks.amazonaws.com'), ) self.intermediate_bucket = s3.Bucket( self, f'{platform_identifier}-data-bucket', bucket_name=f'{platform_identifier}-data-dev', block_public_access=s3.BlockPublicAccess( block_public_acls=False, block_public_policy=False, ignore_public_acls=False, restrict_public_buckets=False ), ) self.intermediate_bucket.grant_read_write(self.lambda_function_role) self.intermediate_bucket.grant_read_write(self.batch_job_role) cluster = ecs.Cluster( self, "covar-api-cluster", cluster_name='covar-service-cluster', vpc=self.vpc )
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter( self, 'OpenSearchDomainName', type='String', description='Amazon OpenSearch Service domain name', default='opensearch-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[a-z]+[A-Za-z0-9\-]+') EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # # vpc_name = self.node.try_get_context('vpc_name') # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', # is_default=True, # vpc_name=vpc_name # ) vpc = aws_ec2.Vpc( self, "OpenSearchVPC", max_azs=3, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) sg_use_opensearch = aws_ec2.SecurityGroup( self, "OpenSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch client', security_group_name='use-opensearch-cluster-sg') cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg') sg_opensearch_cluster = aws_ec2.SecurityGroup( self, "OpenSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch cluster', security_group_name='opensearch-cluster-sg') cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') master_user_secret = aws_secretsmanager.Secret( self, "OpenSearchMasterUserSecret", generate_secret_string=aws_secretsmanager.SecretStringGenerator( secret_string_template=json.dumps({"username": "******"}), generate_string_key="password", # Master password must be at least 8 characters long and contain at least one uppercase letter, # one lowercase letter, one number, and one special character. password_length=8)) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 # You should camelCase the property names instead of PascalCase opensearch_domain = aws_opensearchservice.Domain( self, "OpenSearch", domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string, version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0, capacity={ "master_nodes": 3, "master_node_instance_type": "r6g.large.search", "data_nodes": 3, "data_node_instance_type": "r6g.large.search" }, ebs={ "volume_size": 10, "volume_type": aws_ec2.EbsDeviceVolumeType.GP2 }, #XXX: az_count must be equal to vpc subnets count. zone_awareness={"availability_zone_count": 3}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }, fine_grained_access_control=aws_opensearchservice. AdvancedSecurityOptions( master_user_name=master_user_secret.secret_value_from_json( "username").to_string(), master_user_password=master_user_secret.secret_value_from_json( "password")), # Enforce HTTPS is required when fine-grained access control is enabled. enforce_https=True, # Node-to-node encryption is required when fine-grained access control is enabled node_to_node_encryption=True, # Encryption-at-rest is required when fine-grained access control is enabled. encryption_at_rest={"enabled": True}, use_unsigned_basic_auth=True, security_groups=[sg_opensearch_cluster], automated_snapshot_start_hour=17, # 2 AM (GTM+9) vpc=vpc, vpc_subnets=[ aws_ec2.SubnetSelection( one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT) ], removal_policy=cdk.RemovalPolicy. DESTROY # default: cdk.RemovalPolicy.RETAIN ) cdk.Tags.of(opensearch_domain).add( 'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}') cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'OpenSearchDomainEndpoint', value=opensearch_domain.domain_endpoint, export_name='OpenSearchDomainEndpoint') cdk.CfnOutput( self, 'OpenSearchDashboardsURL', value=f"{opensearch_domain.domain_endpoint}/_dashboards/", export_name='OpenSearchDashboardsURL') cdk.CfnOutput(self, 'MasterUserSecretId', value=master_user_secret.secret_name, export_name='MasterUserSecretId')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # # vpc_name = self.node.try_get_context('vpc_name') # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', # is_default=True, # vpc_name=vpc_name # ) #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack. #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html vpc = aws_ec2.Vpc( self, 'MwaaStack', max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) s3_bucket_name = self.node.try_get_context('s3_bucket_for_dag_code') s3_bucket = s3.Bucket.from_bucket_name(self, "S3BucketForDAGCode", s3_bucket_name) DEFAULT_MWAA_ENV_NAME = 'MyAirflowEnv-{}'.format(''.join( random.sample((string.ascii_letters), k=5))) MY_MWAA_ENV_NAME = self.node.try_get_context('airflow_env_name') MY_MWAA_ENV_NAME = MY_MWAA_ENV_NAME if MY_MWAA_ENV_NAME else DEFAULT_MWAA_ENV_NAME sg_mwaa = aws_ec2.SecurityGroup( self, "AirflowSG", vpc=vpc, allow_all_outbound=True, description='security group for Amazon MWAA Environment {}'.format( MY_MWAA_ENV_NAME), security_group_name='airflow-sg-{}'.format(MY_MWAA_ENV_NAME)) sg_mwaa.add_ingress_rule(peer=sg_mwaa, connection=aws_ec2.Port.all_traffic(), description='airflow security group') cdk.Tags.of(sg_mwaa).add('Name', 'airflow-sg-{}'.format(MY_MWAA_ENV_NAME)) mwaa_execution_policy_doc = aws_iam.PolicyDocument() mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name} "resources": [ self.format_arn( service="airflow", resource="environment", resource_name=MY_MWAA_ENV_NAME, arn_format=cdk.ArnFormat.SLASH_RESOURCE_NAME) ], "actions": ["airflow:PublishMetrics"] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.DENY, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": ["s3:ListAllMyBuckets"] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": ["s3:GetObject*", "s3:GetBucket*", "s3:List*"] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name} "resources": [ self.format_arn( service="logs", resource="log-group", resource_name="airflow-{}-*".format( MY_MWAA_ENV_NAME), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], "actions": [ "logs:CreateLogStream", "logs:CreateLogGroup", "logs:PutLogEvents", "logs:GetLogEvents", "logs:GetLogRecord", "logs:GetLogGroupFields", "logs:GetQueryResults" ] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": ["*"], "actions": ["logs:DescribeLogGroups"] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": ["*"], "actions": ["cloudwatch:PutMetricData"] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, # arn:{partition}:{service}:{region}:{account}:{resource}{sep}{resource-name} "resources": [ self.format_arn(service="sqs", account="*", resource="airflow-celery-*") ], "actions": [ "sqs:ChangeMessageVisibility", "sqs:DeleteMessage", "sqs:GetQueueAttributes", "sqs:GetQueueUrl", "sqs:ReceiveMessage", "sqs:SendMessage" ] })) mwaa_execution_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "actions": [ "kms:Decrypt", "kms:DescribeKey", "kms:GenerateDataKey*", "kms:Encrypt" ], "not_resources": [ self.format_arn( service="kms", region="*", resource="key", resource_name="*", arn_format=cdk.ArnFormat.SLASH_RESOURCE_NAME) ], "conditions": { "StringLike": { "kms:ViaService": [ "sqs.{region}.amazonaws.com".format( region=kwargs['env'].region) ] } } })) mwaa_execution_role = aws_iam.Role( self, 'MWAAExecutionRole', role_name='AmazonMWAA-{name}-{suffix}'.format( name=MY_MWAA_ENV_NAME, suffix=str(kwargs['env'].account)[-5:]), assumed_by=aws_iam.ServicePrincipal('airflow.amazonaws.com'), path='/service-role/', inline_policies={ 'MWAA-Execution-Policy': mwaa_execution_policy_doc }) #XXX: https://github.com/aws/aws-cdk/issues/3227 mwaa_execution_role.assume_role_policy.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "actions": ["sts:AssumeRole"], "principals": [aws_iam.ServicePrincipal('airflow-env.amazonaws.com')] })) #XXX: NetworkConfiguration.SubnetIds: expected maximum item count: 2 MAX_SUBNET_IDS = 2 mwaa_network_conf = mwaa.CfnEnvironment.NetworkConfigurationProperty( subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT). subnet_ids[:MAX_SUBNET_IDS], security_group_ids=[sg_mwaa.security_group_id]) mwaa_logging_conf = mwaa.CfnEnvironment.LoggingConfigurationProperty( dag_processing_logs=mwaa.CfnEnvironment. ModuleLoggingConfigurationProperty(enabled=True, log_level="WARNING"), scheduler_logs=mwaa.CfnEnvironment. ModuleLoggingConfigurationProperty(enabled=True, log_level="WARNING"), task_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty( enabled=True, log_level="INFO"), webserver_logs=mwaa.CfnEnvironment. ModuleLoggingConfigurationProperty(enabled=True, log_level="WARNING"), worker_logs=mwaa.CfnEnvironment.ModuleLoggingConfigurationProperty( enabled=True, log_level="WARNING")) mwaa_conf_options = { "logging.logging_level": "INFO", "core.default_timezone": "utc" } airflow_env = mwaa.CfnEnvironment( self, "MyAirflow", name=MY_MWAA_ENV_NAME, airflow_configuration_options=mwaa_conf_options, airflow_version="2.0.2", #XXX: Valid values=[2.0.2, 1.10.12] dag_s3_path="dags", environment_class= "mw1.small", #XXX: Valid values=[mw1.small, mw1.medium, mw1.large] execution_role_arn=mwaa_execution_role.role_arn, logging_configuration=mwaa_logging_conf, max_workers=2, min_workers=1, network_configuration=mwaa_network_conf, requirements_s3_path="requirements/requirements.txt", source_bucket_arn=s3_bucket.bucket_arn, #tags={"env": "staging", "service": "airflow"}, #XXX: https://github.com/aws/aws-cdk/issues/13772 webserver_access_mode="PUBLIC_ONLY", weekly_maintenance_window_start="SUN:03:30") cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId') cdk.CfnOutput(self, 'AirflowEnvName', value=airflow_env.name, export_name='AirflowEnvName') cdk.CfnOutput(self, 'AirflowVersion', value=airflow_env.airflow_version, export_name='AirflowVersion') cdk.CfnOutput(self, 'AirflowSourceBucketArn', value=airflow_env.source_bucket_arn, export_name='AirflowSourceBucketArn') cdk.CfnOutput(self, 'AirflowDagS3Path', value=airflow_env.dag_s3_path, export_name='AirflowDagS3Path') cdk.CfnOutput(self, 'MWAAEnvironmentClass', value=airflow_env.environment_class, export_name='MWAAEnvironmentClass') cdk.CfnOutput(self, 'MWAASecurityGroupID', value=sg_mwaa.security_group_id, export_name='MWAASecurityGroupID') cdk.CfnOutput(self, 'MWAAExecutionRoleArn', value=airflow_env.execution_role_arn, export_name='MWAAExecutionRoleArn')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "FirehoseToS3VPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) ASYNC_CALLEE_LAMBDA_FN_NAME = "LambdaAsyncCallee" async_callee_lambda_fn = aws_lambda.Function( self, "LambdaAsyncCallee", runtime=aws_lambda.Runtime.PYTHON_3_7, function_name="LambdaAsyncCallee", handler="lambda_aync_callee.lambda_handler", description= "Lambda function asynchrously invoked by LambdaAsyncCaller", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), timeout=cdk.Duration.minutes(5)) log_group = aws_logs.LogGroup( self, "LambdaAsyncCalleeLogGroup", #XXX: Circular dependency between resources occurs # if aws_lambda.Function.function_name is used # instead of literal name of lambda function such as "LambdaAsyncCallee" log_group_name="/aws/lambda/{}".format( ASYNC_CALLEE_LAMBDA_FN_NAME), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) log_group.grant_write(async_callee_lambda_fn) event_bus = aws_events.EventBus( self, "EventBusForLambda", event_bus_name="EventBusForLambdaDestinations", ) event_bus.apply_removal_policy(cdk.RemovalPolicy.DESTROY) log_group = aws_logs.LogGroup( self, "EventBusLogGroup", log_group_name="/aws/events/{}".format(event_bus.event_bus_name), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) event_rule = aws_events.Rule( self, "EventRuleForLambdaDestinations", rule_name="EventRuleForLambdaDestinations", event_bus=event_bus, event_pattern={"account": [self.account]}) event_rule.add_target(aws_events_targets.CloudWatchLogGroup(log_group)) event_rule.apply_removal_policy(cdk.RemovalPolicy.DESTROY) CALLER_LAMBDA_FN_NAME = "LambdaAsyncCaller" caller_lambda_fn = aws_lambda.Function( self, "LambdaAsyncCaller", runtime=aws_lambda.Runtime.PYTHON_3_7, function_name="LambdaAsyncCaller", handler="lambda_caller.lambda_handler", description="Asynchronusly call lambda function", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), timeout=cdk.Duration.minutes(5), #XXX: Uncomments out if you want to use different lambda function version # current_version_options=aws_lambda.VersionOptions( # on_success=aws_lambda_destinations.LambdaDestination(async_callee_lambda_fn, response_only=False), # on_failure=aws_lambda_destinations.EventBridgeDestination(event_bus), # max_event_age=cdk.Duration.hours(6), # Minimum: 60 seconds, Maximum: 6 hours, Default: 6 hours # retry_attempts=0 # Minimum: 0, Maximum: 2, Default: 2 # ), on_success=aws_lambda_destinations.LambdaDestination( async_callee_lambda_fn, response_only=False), on_failure=aws_lambda_destinations.EventBridgeDestination( event_bus), max_event_age=cdk.Duration.hours( 6), # Minimum: 60 seconds Maximum: 6 hours, Default: 6 hours #XXX: Set retry_attempts to 0 in order to invoke other lambda function as soon as a error occurred retry_attempts=0 # Minimum: 0, Maximum: 2, Default: 2 ) sns_topic = aws_sns.Topic(self, 'SnsTopicForLambda', topic_name='LambdaSourceEvent', display_name='lambda source event') caller_lambda_fn.add_event_source( aws_lambda_event_sources.SnsEventSource(sns_topic)) caller_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( partition="aws", service="lambda", region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, resource="function", resource_name="{}*".format( async_callee_lambda_fn.function_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["lambda:InvokeFunction"])) caller_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[event_bus.event_bus_arn], actions=["events:PutEvents"])) log_group = aws_logs.LogGroup( self, "LambdaAsyncCallerLogGroup", #XXX: Circular dependency between resources occurs # if aws_lambda.Function.function_name is used # instead of literal name of lambda function such as "LambdaAsyncCaller" log_group_name="/aws/lambda/{}".format(CALLER_LAMBDA_FN_NAME), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) log_group.grant_write(caller_lambda_fn) cdk.CfnOutput(self, 'SNSTopicName', value=sns_topic.topic_name, export_name='SNSTopicName') cdk.CfnOutput(self, 'SNSTopicArn', value=sns_topic.topic_arn, export_name='SNSTopicArn')
def __init__(self, scope: core.Construct, id: str, stage={}, **kwargs) -> None: super().__init__(scope, id, **kwargs) # parameters from context customer = self.node.try_get_context("customer") namestage = stage['name_stage'] vpccidr = stage['vpc_cidr'] # VPC CIDR vpcname = "vpc-" + customer + "-" + namestage subnetprefix = int(stage['subnet_prefix']) # Subnets Prefix /XX maxazs = int(stage['max_azs']) # Nro of AZs layers = stage['layers'] # Names for Layers layerendpoints = stage['layer_endpoints'] # Layer name for Endpoints layersnat = stage['layer_nats'] # Layer name for the Nat Subnets activeflowlogs = stage['active_flowlogs'] # Active flowlogs? # flags subnets types flg_public = False flg_private = False flg_isolated = False # TODO: HANDLE ERROR for insuficient space to layers in VPC CIDR Space nro_subnets = len(layers.keys()) * maxazs subnets = list(ip.ip_network(vpccidr).subnets(new_prefix=subnetprefix)) # subnets configuration - layeres * azs subnetsconfs = [] for layer in layers: layertype = layers[layer] if layertype == 'PUBLIC': sntype = _ec2.SubnetType.PUBLIC flg_public = True if layertype == 'PRIVATE': sntype = _ec2.SubnetType.PRIVATE flg_private = True if layertype == 'ISOLATED': flg_isolated = True sntype = _ec2.SubnetType.ISOLATED subnetsconfs.append( _ec2.SubnetConfiguration(name=layer, subnet_type=sntype, cidr_mask=subnetprefix)) # selection subnets nat natsubnets = None if layersnat in layers and layers[layersnat] == 'PUBLIC': natsubnets = _ec2.SubnetSelection(subnet_group_name=layersnat) # vpc tenacy vpctenacy = _ec2.DefaultInstanceTenancy.DEFAULT if self.node.try_get_context("vpc_tenacy") == 'DEDICATED': vpctenacy = _ec2.DefaultInstanceTenancy.DEDICATED # creation vpc sn_layer_endpoints = [ _ec2.SubnetSelection(one_per_az=True, subnet_group_name=layerendpoints) ] vpc = _ec2.Vpc(self, vpcname, max_azs=maxazs, cidr=vpccidr, subnet_configuration=subnetsconfs, nat_gateway_subnets=natsubnets, default_instance_tenancy=vpctenacy, gateway_endpoints={ "S3": _ec2.GatewayVpcEndpointOptions( service=_ec2.GatewayVpcEndpointAwsService.S3, subnets=sn_layer_endpoints) }) # Config Route Tables # TODO: create RT by subnets type publicsubnets = vpc.select_subnets( subnet_type=_ec2.SubnetType.PUBLIC) if flg_public else "" privatesubnets = vpc.select_subnets( subnet_type=_ec2.SubnetType.PRIVATE) if flg_private else "" isolatedsubnets = vpc.select_subnets( subnet_type=_ec2.SubnetType.ISOLATED) if flg_isolated else "" # Endpoints # s3 Endpoint print(layerendpoints) sn_layer_endpoints = _ec2.SubnetSelection( one_per_az=True, subnet_group_name=layerendpoints) #vpc.add_s3_endpoint(vpcname+"-S3Endpoint",subnets=sn_layer_endpoints) #vpc.add_gateway_endpoint(vpcname + "-S3Endpoint", service=_ec2.GatewayVpcEndpointAwsService.S3, # subnets=sn_layer_endpoints) # ec2 endpoint ec2_endpoint = vpc.add_interface_endpoint( vpcname + "-ec2_endpoint", service=_ec2.InterfaceVpcEndpointAwsService.E_C2, subnets=sn_layer_endpoints) ec2_endpoint.connections.allow_from_any_ipv4( port_range=_ec2.Port(from_port=443, to_port=443, protocol=_ec2.Protocol.TCP, string_representation="https")) # ec2 messages endpoint ec2messages_endpoint = vpc.add_interface_endpoint( vpcname + "-ec2message_endpoint", service=_ec2.InterfaceVpcEndpointAwsService.E_C2_MESSAGES, subnets=sn_layer_endpoints) ec2messages_endpoint.connections.allow_from_any_ipv4( port_range=_ec2.Port(from_port=443, to_port=443, protocol=_ec2.Protocol.TCP, string_representation="https")) # ssm endpoint ssm_endpoint = vpc.add_interface_endpoint( vpcname + "-ssm_endpoint", service=_ec2.InterfaceVpcEndpointAwsService.SSM, subnets=sn_layer_endpoints) ssm_endpoint.connections.allow_from_any_ipv4( port_range=_ec2.Port(from_port=443, to_port=443, protocol=_ec2.Protocol.TCP, string_representation="https")) # ssm messages endpoint ssmmessages_endpoint = vpc.add_interface_endpoint( vpcname + "-ssmmessages_endpoint", service=_ec2.InterfaceVpcEndpointAwsService.SSM_MESSAGES, subnets=sn_layer_endpoints) ssmmessages_endpoint.connections.allow_from_any_ipv4( port_range=_ec2.Port(from_port=443, to_port=443, protocol=_ec2.Protocol.TCP, string_representation="https")) # SSM IAM Role ec2_ssm_iam_role = iam.Role( self, "ssm_ec2_iam_role", assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'), role_name="ssm_ec2_iam_role_" + stage['name_stage']) ec2_ssm_iam_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AmazonEC2RoleforSSM')) ec2_ssm_iam_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'CloudWatchAgentServerPolicy')) #add Role to instance profile iam.CfnInstanceProfile( self, "instance_profile", roles=["ssm_ec2_iam_role_" + stage['name_stage']], instance_profile_name="ssm_ec2_iam_role_" + stage['name_stage']) #add polices to iam role ec2_ssm_iam_role.add_to_policy( iam.PolicyStatement(resources=[ "arn:aws:s3:::aws-ssm-" + self.region + "/*", "arn:aws:s3:::aws-windows-downloads-" + self.region + "/*", "arn:aws:s3:::amazon-ssm-" + self.region + "/*", "arn:aws:s3:::amazon-ssm-packages-" + self.region + "/*", "arn:aws:s3:::" + self.region + "-birdwatcher-prod/*", "arn:aws:s3:::patch-baseline-snapshot-" + self.region + "/*" ], actions=["s3:GetObject"])) ec2_ssm_iam_role.add_to_policy( iam.PolicyStatement(resources=["*"], actions=[ "ssmmessages:CreateControlChannel", "ssmmessages:CreateDataChannel", "ssmmessages:OpenControlChannel", "ssmmessages:OpenDataChannel", "s3:GetEncryptionConfiguration", "kms:Decrypt" ]))
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "FirehoseToS3VPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="firehose-to-s3-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) FIREHOSE_STREAM_NAME = cdk.CfnParameter( self, 'FirehoseStreamName', type='String', description='kinesis data firehose stream name', default='PUT-S3-{}'.format(''.join( random.sample((string.ascii_letters), k=5)))) FIREHOSE_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseBufferSize', type='Number', description='kinesis data firehose buffer size', min_value=1, max_value=128, default=128) FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseBufferInterval', type='Number', description='kinesis data firehose buffer interval', min_value=60, max_value=300, default=60) FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseLambdaBufferSize', type='Number', description= 'kinesis data firehose buffer size for AWS Lambda to transform records', min_value=1, max_value=3, default=3) FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseLambdaBufferInterval', type='Number', description= 'kinesis data firehose buffer interval for AWS Lambda to transform records', min_value=60, max_value=900, default=300) FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter( self, 'FirehoseLambdaNumberOfRetries', type='Number', description= 'Number of retries for AWS Lambda to transform records in kinesis data firehose', min_value=1, max_value=5, default=3) FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter( self, 'FirehosePrefix', type='String', description='kinesis data firehose S3 prefix') FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter( self, 'FirehoseErrorOutputPrefix', type='String', description='kinesis data firehose S3 error output prefix', default= 'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}' ) METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor" metadata_extract_lambda_fn = aws_lambda.Function( self, "MetadataExtractor", runtime=aws_lambda.Runtime.PYTHON_3_7, function_name="MetadataExtractor", handler="metadata_extractor.lambda_handler", description="Extract partition keys from records", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), timeout=cdk.Duration.minutes(5)) log_group = aws_logs.LogGroup( self, "MetadataExtractorLogGroup", #XXX: Circular dependency between resources occurs # if aws_lambda.Function.function_name is used # instead of literal name of lambda function such as "MetadataExtractor" log_group_name="/aws/lambda/{}".format( METADATA_EXTRACT_LAMBDA_FN_NAME), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) log_group.grant_write(metadata_extract_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html # String-encoded tokens: # Avoid manipulating the string in other ways. For example, # taking a substring of a string is likely to break the string token. firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} "resources": [ self.format_arn( partition="aws", service="lambda", region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, resource="function", resource_name="{}:*".format( metadata_extract_lambda_fn.function_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], "actions": [ "lambda:InvokeFunction", "lambda:GetFunctionConfiguration" ] })) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name="KinesisFirehoseServiceRole-{stream_name}-{region}". format(stream_name=FIREHOSE_STREAM_NAME.value_as_string, region=cdk.Aws.REGION), assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), path='/service-role/', #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) lambda_proc = cfn.ProcessorProperty( type="Lambda", parameters=[ cfn.ProcessorParameterProperty( parameter_name="LambdaArn", parameter_value='{}:{}'.format( metadata_extract_lambda_fn.function_arn, metadata_extract_lambda_fn.current_version.version)), cfn.ProcessorParameterProperty( parameter_name="NumberOfRetries", parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES. value_as_string), cfn.ProcessorParameterProperty( parameter_name="RoleArn", parameter_value=firehose_role.role_arn), cfn.ProcessorParameterProperty( parameter_name="BufferSizeInMBs", parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string ), cfn.ProcessorParameterProperty( parameter_name="BufferIntervalInSeconds", parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL. value_as_string) ]) record_deaggregation_proc = cfn.ProcessorProperty( type="RecordDeAggregation", parameters=[ cfn.ProcessorParameterProperty(parameter_name="SubRecordType", parameter_value="JSON") ]) #XXX: Adding a new line delimiter when delivering data to S3 # This is also particularly useful when dynamic partitioning is applied to aggregated data # because multirecord deaggregation (which must be applied to aggregated data # before it can be dynamically partitioned) removes new lines from records as part of the parsing process. # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter append_delim_to_record_proc = cfn.ProcessorProperty( type="AppendDelimiterToRecord", parameters=[]) firehose_processing_config = cfn.ProcessingConfigurationProperty( enabled=True, processors=[ record_deaggregation_proc, append_delim_to_record_proc, lambda_proc ]) ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty( bucket_arn=s3_bucket.bucket_arn, role_arn=firehose_role.role_arn, buffering_hints={ "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number, "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "DestinationDelivery" }, compression_format= "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] data_format_conversion_configuration={"enabled": False}, dynamic_partitioning_configuration={ "enabled": True, "retryOptions": { "durationInSeconds": 300 } }, error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX. value_as_string, prefix=FIREHOSE_TO_S3_PREFIX.value_as_string, processing_configuration=firehose_processing_config) firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "FirehoseToS3", delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string, delivery_stream_type="DirectPut", extended_s3_destination_configuration=ext_s3_dest_config, tags=[{ "key": "Name", "value": FIREHOSE_STREAM_NAME.value_as_string }]) cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter( self, 'OpenSearchDomainName', type='String', description='Amazon OpenSearch Service domain name', default='opensearch-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[a-z]+[A-Za-z0-9\-]+') OPENSEARCH_INDEX_NAME = cdk.CfnParameter( self, 'SearchIndexName', type='String', description='Amazon OpenSearch Service index name') EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) # vpc = aws_ec2.Vpc( self, "EKKStackVPC", max_azs=3, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) sg_use_opensearch = aws_ec2.SecurityGroup( self, "OpenSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch client', security_group_name='use-opensearch-cluster-sg') cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg') sg_opensearch_cluster = aws_ec2.SecurityGroup( self, "OpenSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch cluster', security_group_name='opensearch-cluster-sg') cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') master_user_secret = aws_secretsmanager.Secret( self, "OpenSearchMasterUserSecret", generate_secret_string=aws_secretsmanager.SecretStringGenerator( secret_string_template=json.dumps({"username": "******"}), generate_string_key="password", # Master password must be at least 8 characters long and contain at least one uppercase letter, # one lowercase letter, one number, and one special character. password_length=8)) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 # You should camelCase the property names instead of PascalCase opensearch_domain = aws_opensearchservice.Domain( self, "OpenSearch", domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string, version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0, #XXX: You cannot use graviton instances with non-graviton instances. # Use graviton instances as data nodes or use non-graviton instances as master nodes. capacity={ "master_nodes": 3, "master_node_instance_type": "r6g.large.search", "data_nodes": 3, "data_node_instance_type": "r6g.large.search" }, ebs={ "volume_size": 10, "volume_type": aws_ec2.EbsDeviceVolumeType.GP2 }, #XXX: az_count must be equal to vpc subnets count. zone_awareness={"availability_zone_count": 3}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }, fine_grained_access_control=aws_opensearchservice. AdvancedSecurityOptions( master_user_name=master_user_secret.secret_value_from_json( "username").to_string(), master_user_password=master_user_secret.secret_value_from_json( "password")), # Enforce HTTPS is required when fine-grained access control is enabled. enforce_https=True, # Node-to-node encryption is required when fine-grained access control is enabled node_to_node_encryption=True, # Encryption-at-rest is required when fine-grained access control is enabled. encryption_at_rest={"enabled": True}, use_unsigned_basic_auth=True, security_groups=[sg_opensearch_cluster], automated_snapshot_start_hour=17, # 2 AM (GTM+9) vpc=vpc, vpc_subnets=[ aws_ec2.SubnetSelection( one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT) ], removal_policy=cdk.RemovalPolicy. DESTROY # default: cdk.RemovalPolicy.RETAIN ) cdk.Tags.of(opensearch_domain).add( 'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}') S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="opskk-stack-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ opensearch_domain.domain_arn, "{}/*".format(opensearch_domain.domain_arn) ], actions=[ "es:DescribeElasticsearchDomain", "es:DescribeElasticsearchDomains", "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost", "es:ESHttpPut" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/ resources=[ opensearch_domain.domain_arn, f"{opensearch_domain.domain_arn}/_all/_settings", f"{opensearch_domain.domain_arn}/_cluster/stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%", f"{opensearch_domain.domain_arn}/_nodes", f"{opensearch_domain.domain_arn}/_nodes/stats", f"{opensearch_domain.domain_arn}/_nodes/*/stats", f"{opensearch_domain.domain_arn}/_stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats" ], actions=["es:ESHttpGet"])) firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name= f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty( role_arn=firehose_role.role_arn, security_group_ids=[sg_use_opensearch.security_group_id], subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids) opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( index_name=OPENSEARCH_INDEX_NAME.value_as_string, role_arn=firehose_role.role_arn, s3_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Backup" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console. "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/", "roleArn": firehose_role.role_arn }, buffering_hints={ "intervalInSeconds": 60, "sizeInMBs": 1 }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "ElasticsearchDelivery" }, domain_arn=opensearch_domain.domain_arn, index_rotation_period= "NoRotation", # [NoRotation | OneDay | OneHour | OneMonth | OneWeek] retry_options={"durationInSeconds": 60}, s3_backup_mode= "FailedDocumentsOnly", # [AllDocuments | FailedDocumentsOnly] vpc_configuration=opensearch_dest_vpc_config) firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToES", delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string, delivery_stream_type="DirectPut", elasticsearch_destination_configuration=opensearch_dest_config, tags=[{ "key": "Name", "value": OPENSEARCH_INDEX_NAME.value_as_string }]) cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'OpenSearchDomainEndpoint', value=opensearch_domain.domain_endpoint, export_name='OpenSearchDomainEndpoint') cdk.CfnOutput( self, 'OpenSearchDashboardsURL', value=f"{opensearch_domain.domain_endpoint}/_dashboards/", export_name='OpenSearchDashboardsURL') cdk.CfnOutput(self, 'MasterUserSecretId', value=master_user_secret.secret_name, export_name='MasterUserSecretId') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket') cdk.CfnOutput(self, 'FirehoseRoleArn', value=firehose_role.role_arn, export_name='FirehoseRoleArn')
def __init__(self, scope: core.Construct, id: str, stage={}, **kwargs) -> None: super().__init__(scope, id, **kwargs) customer = self.node.try_get_context("customer") stage_name = stage["stage_name"] vpc_cidr = stage["vpc_cidr"] vpc_prefix = stage["vpc_prefix"] prefix_name = f'{vpc_prefix}-{stage_name}-{customer}' subnet_prefix = int(stage['subnet_prefix']) max_azs = int(stage['max_azs']) nat_number = int(stage['nat_number']) layers = stage['layers'] layer_endpoints = stage['layer_endpoints'] layers_nat = stage['layer_nats'] flag_public = False flag_private = False flag_isolated = False subnets_config = [] for layer in layers: layer_type = layers[layer] if layer_type == 'PUBLIC': subnet_type = _ec2.SubnetType.PUBLIC flag_public = True if layer_type == 'PRIVATE': subnet_type = _ec2.SubnetType.PRIVATE flag_private = True if layer_type == 'ISOLATED': flag_isolated = True subnet_type = _ec2.SubnetType.ISOLATED subnets_config.append( _ec2.SubnetConfiguration(name=layer, subnet_type=subnet_type, cidr_mask=subnet_prefix)) nat_subnets = None if layers_nat in layers and layers[layers_nat] == 'PUBLIC': nat_subnets = _ec2.SubnetSelection(subnet_group_name=layers_nat) vpc_tenacy = _ec2.DefaultInstanceTenancy.DEFAULT if self.node.try_get_context("vpc_tenacy") == 'DEDICATED': vpc_tenacy = _ec2.DefaultInstanceTenancy.DEDICATED subnet_layer_endpoints = [ _ec2.SubnetSelection(one_per_az=True, subnet_group_name=layer_endpoints) ] self.vpc = _ec2.Vpc( self, prefix_name, max_azs=max_azs, cidr=vpc_cidr, subnet_configuration=subnets_config, nat_gateway_subnets=nat_subnets, nat_gateways=nat_number, default_instance_tenancy=vpc_tenacy, gateway_endpoints={ "S3": _ec2.GatewayVpcEndpointOptions( service=_ec2.GatewayVpcEndpointAwsService.S3, subnets=subnet_layer_endpoints) }) # tagging core.Tags.of(self.vpc.node.default_child).add("Name", f'{prefix_name}-vpc') core.Tags.of(self.vpc.node.find_child('IGW')).add( "Name", f'{prefix_name}-igw') prisub = [prs for prs in self.vpc.private_subnets] pubsub = [pus for pus in self.vpc.public_subnets] isosub = [ios for ios in self.vpc.isolated_subnets] count = 1 for nat in stage['nat_number']: core.Tags.of( self.vpc.node.find_child('publicSubnet' + str(count)).node. find_child('NATGateway')).add("Name", f'{prefix_name}-nat') core.Tags.of( self.vpc.node.find_child( 'publicSubnet' + str(count)).node.find_child("EIP")).add( "Name", f'{prefix_name}-public-eip-{count}') count += 1 count = 1 for prs in prisub: az_end = prs.availability_zone[-2:] core.Tags.of(prs.node.default_child).add( "Name", f'{prefix_name}-private-{az_end}') core.Tags.of( self.vpc.node.find_child( 'privateSubnet' + str(count)).node.find_child('RouteTable')).add( "Name", f'{prefix_name}-private-rt-{az_end}') count += 1 count = 1 for pus in pubsub: az_end = pus.availability_zone[-2:] core.Tags.of(pus.node.default_child).add( "Name", f'{prefix_name}-public-{az_end}') core.Tags.of( self.vpc.node.find_child( 'publicSubnet' + str(count)).node.find_child('RouteTable')).add( "Name", f'{prefix_name}-public-rt-{az_end}') count += 1 count = 1 for ios in isosub: az_end = ios.availability_zone[-2:] core.Tags.of(ios.node.default_child).add( "Name", f'{prefix_name}-database-{az_end}') core.Tags.of( self.vpc.node.find_child( 'databaseSubnet' + str(count)).node.find_child('RouteTable')).add( "Name", f'{prefix_name}-database-rt-{az_end}') count += 1 core.CfnOutput(self, "Output", value=self.vpc.vpc_id)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # # vpc_name = self.node.try_get_context('vpc_name') # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', # is_default=True, # vpc_name=vpc_name # ) vpc = aws_ec2.Vpc(self, "ApiGatewayDynamoDBVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3 ), "DynamoDB": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB ) } ) DDB_TABLE_SUFFIX = ''.join(random.sample((string.ascii_lowercase + string.digits), k=7)) DDB_TABLE_NAME = "Comments-{}".format(DDB_TABLE_SUFFIX) ddb_table = aws_dynamodb.Table(self, "DynamoDbTable", table_name=DDB_TABLE_NAME, removal_policy=cdk.RemovalPolicy.DESTROY, partition_key=aws_dynamodb.Attribute(name="commentId", type=aws_dynamodb.AttributeType.STRING), time_to_live_attribute="ttl", billing_mode=aws_dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5, ) ddb_table.add_global_secondary_index( read_capacity=15, write_capacity=5, index_name="pageId-index", partition_key=aws_dynamodb.Attribute(name='pageId', type=aws_dynamodb.AttributeType.STRING), projection_type=aws_dynamodb.ProjectionType.ALL ) user_pool = aws_cognito.UserPool(self, 'UserPool', user_pool_name='UserPoolForApiGateway', removal_policy=cdk.RemovalPolicy.DESTROY, self_sign_up_enabled=True, sign_in_aliases={'email': True}, auto_verify={'email': True}, password_policy={ 'min_length': 8, 'require_lowercase': False, 'require_digits': False, 'require_uppercase': False, 'require_symbols': False, }, account_recovery=aws_cognito.AccountRecovery.EMAIL_ONLY ) user_pool_client = aws_cognito.UserPoolClient(self, 'UserPoolClient', user_pool=user_pool, auth_flows={ 'admin_user_password': True, 'user_password': True, 'custom': True, 'user_srp': True }, supported_identity_providers=[aws_cognito.UserPoolClientIdentityProvider.COGNITO] ) auth = aws_apigateway.CognitoUserPoolsAuthorizer(self, 'AuthorizerForDynamoDBApi', cognito_user_pools=[user_pool] ) ddb_access_policy_doc = aws_iam.PolicyDocument() ddb_access_policy_doc.add_statements(aws_iam.PolicyStatement(**{ "effect": aws_iam.Effect.ALLOW, "resources": [ddb_table.table_arn], "actions": [ "dynamodb:DeleteItem", "dynamodb:PartiQLInsert", "dynamodb:UpdateTimeToLive", "dynamodb:BatchWriteItem", "dynamodb:PutItem", "dynamodb:PartiQLUpdate", "dynamodb:UpdateItem", "dynamodb:PartiQLDelete" ] })) apigw_dynamodb_role = aws_iam.Role(self, "ApiGatewayRoleForDynamoDB", role_name='APIGatewayRoleForDynamoDB', assumed_by=aws_iam.ServicePrincipal('apigateway.amazonaws.com'), inline_policies={ 'DynamoDBAccessPolicy': ddb_access_policy_doc }, managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name('AmazonDynamoDBReadOnlyAccess'), ] ) dynamodb_api = aws_apigateway.RestApi(self, "DynamoDBProxyAPI", rest_api_name="comments-api", description="An Amazon API Gateway REST API that integrated with an Amazon DynamoDB.", endpoint_types=[aws_apigateway.EndpointType.REGIONAL], default_cors_preflight_options={ "allow_origins": aws_apigateway.Cors.ALL_ORIGINS }, deploy=True, deploy_options=aws_apigateway.StageOptions(stage_name="v1"), endpoint_export_name="DynamoDBProxyAPIEndpoint" ) all_resources = dynamodb_api.root.add_resource("comments") one_resource = all_resources.add_resource("{pageId}") apigw_error_responses = [ aws_apigateway.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), aws_apigateway.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ] apigw_ok_responses = [ aws_apigateway.IntegrationResponse( status_code="200" ) ] ddb_put_item_options = aws_apigateway.IntegrationOptions( credentials_role=apigw_dynamodb_role, integration_responses=[*apigw_ok_responses, *apigw_error_responses], request_templates={ 'application/json': json.dumps({ "TableName": DDB_TABLE_NAME, "Item": { "commentId": { "S": "$context.requestId" }, "pageId": { "S": "$input.path('$.pageId')" }, "userName": { "S": "$input.path('$.userName')" }, "message": { "S": "$input.path('$.message')" } } }, indent=2) }, passthrough_behavior=aws_apigateway.PassthroughBehavior.WHEN_NO_TEMPLATES ) create_integration = aws_apigateway.AwsIntegration( service='dynamodb', action='PutItem', integration_http_method='POST', options=ddb_put_item_options ) method_responses = [ aws_apigateway.MethodResponse(status_code='200'), aws_apigateway.MethodResponse(status_code='400'), aws_apigateway.MethodResponse(status_code='500') ] all_resources.add_method('POST', create_integration, method_responses=method_responses, authorization_type=aws_apigateway.AuthorizationType.COGNITO, authorizer=auth ) get_response_templates = ''' #set($inputRoot = $input.path('$')) { "comments": [ #foreach($elem in $inputRoot.Items) { "commentId": "$elem.commentId.S", "userName": "******", "message": "$elem.message.S" }#if($foreach.hasNext),#end #end ] }''' ddb_query_item_options = aws_apigateway.IntegrationOptions( credentials_role=apigw_dynamodb_role, integration_responses=[ aws_apigateway.IntegrationResponse( status_code="200", response_templates={ 'application/json': get_response_templates } ), *apigw_error_responses ], request_templates={ 'application/json': json.dumps({ "TableName": DDB_TABLE_NAME, "IndexName": "pageId-index", "KeyConditionExpression": "pageId = :v1", "ExpressionAttributeValues": { ":v1": { "S": "$input.params('pageId')" } } }, indent=2) }, passthrough_behavior=aws_apigateway.PassthroughBehavior.WHEN_NO_TEMPLATES ) get_integration = aws_apigateway.AwsIntegration( service='dynamodb', action='Query', integration_http_method='POST', options=ddb_query_item_options ) one_resource.add_method('GET', get_integration, method_responses=method_responses, authorization_type=aws_apigateway.AuthorizationType.COGNITO, authorizer=auth ) cdk.CfnOutput(self, 'DynamoDBTableName', value=ddb_table.table_name) cdk.CfnOutput(self, 'UserPoolId', value=user_pool.user_pool_id) cdk.CfnOutput(self, 'UserPoolClientId', value=user_pool_client.user_pool_client_id)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "EKKStackVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 ES_DOMAIN_NAME = self.node.try_get_context("es_domain_name") es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=ES_DOMAIN_NAME, elasticsearch_version="7.10", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(ES_DOMAIN_NAME)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids }) cdk.Tags.of(es_cfn_domain).add('Name', ES_DOMAIN_NAME) S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="ekk-stack-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[ es_cfn_domain.attr_arn, "{}/*".format(es_cfn_domain.attr_arn) ], actions=[ "es:DescribeElasticsearchDomain", "es:DescribeElasticsearchDomains", "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost", "es:ESHttpPut" ])) ES_INDEX_NAME = self.node.try_get_context("es_index_name") firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ es_cfn_domain.attr_arn, "{}/*".format(es_cfn_domain.attr_arn) ], # resources=[ # "{aes_arn}/_all/_settings".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_cluster/stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/{es_index_name}*/_mapping".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME), # "{aes_arn}/_nodes".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_nodes/*/stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/{es_index_name}*/_stats".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME) # ], actions=["es:ESHttpGet"])) firehose_log_group_name = "/aws/kinesisfirehose/{}".format( ES_INDEX_NAME) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name="KinesisFirehoseServiceRole-{es_index}-{region}".format( es_index=ES_INDEX_NAME, region=cdk.Aws.REGION), assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) es_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty( role_arn=firehose_role.role_arn, security_group_ids=[sg_use_es.security_group_id], subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids) es_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( index_name=ES_INDEX_NAME, role_arn=firehose_role.role_arn, s3_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Backup" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console. "prefix": "{}/".format(ES_INDEX_NAME), "roleArn": firehose_role.role_arn }, buffering_hints={ "intervalInSeconds": 60, "sizeInMBs": 1 }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "ElasticsearchDelivery" }, domain_arn=es_cfn_domain.attr_arn, index_rotation_period= "NoRotation", # [NoRotation | OneDay | OneHour | OneMonth | OneWeek] retry_options={"durationInSeconds": 60}, s3_backup_mode= "FailedDocumentsOnly", # [AllDocuments | FailedDocumentsOnly] vpc_configuration=es_dest_vpc_config) firehose_to_es_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToES", delivery_stream_name=ES_INDEX_NAME, delivery_stream_type="DirectPut", elasticsearch_destination_configuration=es_dest_config, tags=[{ "key": "Name", "value": ES_DOMAIN_NAME }])