def __init__( self, scope: core.Construct, id: str, **kwargs, ) -> None: super().__init__(scope, id, **kwargs) # elastic policy elastic_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "es:*", ], resources=["*"], conditions={ "IpAddress": { 'aws:SourceIp': constants["ES_CLIENT_IP"] } } #effect=iam.Effect.ALLOW, actions=["es:*",], resources=["*"],conditions={"IpAddress":{'aws:SourceIp':"127.0.0.1" }} ) elastic_policy.add_any_principal() elastic_document = iam.PolicyDocument() elastic_document.add_statements(elastic_policy) # cluster config cluster_config = { "instanceCount": constants["ELASTIC_INSTANCE_COUNT"], "instanceType": constants["ELASTIC_INSTANCE_TYPE"], "zoneAwarenessEnabled": False, #"zoneAwarenessConfig": {"availabilityZoneCount": 1}, } # create the elastic cluster self.elastic_domain = aes.CfnDomain( self, "elastic_domain", domain_name=constants["ELASTIC_NAME"], elasticsearch_cluster_config=cluster_config, elasticsearch_version=constants["ELASTIC_VERSION"], ebs_options={ "ebsEnabled": True, "volumeSize": 10 }, access_policies=elastic_document, #log_publishing_options={"enabled": True}, #cognito_options={"enabled": True}, ) #core.Tag.add(self.elastic_domain, "project", constants["PROJECT_TAG"]) core.Tags.of(self.elastic_domain).add("project", constants["PROJECT_TAG"]) core.CfnOutput( self, 'DomainArn', export_name="esarn", value=self.elastic_domain.attr_arn, description="elasticsearch domain arn", )
def _build_elastic_search(self, *, stack, vpc_db_instance): # example in https://github.com/aws-samples/aws-cdk-managed-elkk # TODO: fine-tune policies for data access self.elastic_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "es:*", ], resources=["*"], ) self.elastic_policy.add_any_principal() self.elastic_document = iam.PolicyDocument() self.elastic_document.add_statements(self.elastic_policy) self.es_security_group = ec2.SecurityGroup( stack, "BastiontoES", security_group_name="BastiontoES", vpc=vpc_db_instance.vpc, allow_all_outbound=True) self.kfh_security_group = ec2.SecurityGroup( stack, "KFHtoES", security_group_name="KFHtoES", vpc=vpc_db_instance.vpc, allow_all_outbound=True) self.es_security_group.add_ingress_rule( vpc_db_instance.bastion_host_security_group, ec2.Port.tcp(443), 'es') self.es_security_group.add_ingress_rule(self.kfh_security_group, ec2.Port.tcp(443), 'kfh') self.elastic_search = es.CfnDomain( stack, 'exampledomain', domain_name='exampledomaines', elasticsearch_cluster_config={ "instanceCount": 1, "instanceType": 't2.small.elasticsearch' }, elasticsearch_version="7.7", ebs_options={ "ebsEnabled": True, "volumeSize": 10 }, vpc_options={ "securityGroupIds": [self.es_security_group.security_group_id], "subnetIds": [ vpc_db_instance.vpc.select_subnets( subnet_type=ec2.SubnetType.PRIVATE).subnet_ids[0] ] }, access_policies=self.elastic_document)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) ZachESInstanceName = self.__class__.__name__ DiskOpt={ "EBSEnabled" : True, "VolumeSize" : 10, "VolumeType" : "gp2" } ESconfig = { "DedicatedMasterCount" : 3, "DedicatedMasterEnabled" : True, "DedicatedMasterType" : "m4.large.elasticsearch", "InstanceCount" : 3, "InstanceType" : "m4.large.elasticsearch", "ZoneAwarenessEnabled" : True } vpc = ec2.Vpc.from_lookup(self,id="Zach_VPC_Stack_A",vpc_id="vpc-01e73b4b5c6f9f98a") SubnetIDList,SGList=[],[] for subnet in vpc.public_subnets: SubnetIDList.append(subnet.subnet_id) vpc_options={"SubnetIds" :SubnetIDList, "SecurityGroupIds" : []} ''' EBS storage must be selected for m4.large.elasticsearch (Service: AWSElasticsearch; Status Code: 400; Error Code: ValidationException ''' ZachESInstance=es.CfnDomain(self, id=ZachESInstanceName, domain_name=ZachESInstanceName.replace("_","").lower(), ebs_options=DiskOpt, elasticsearch_version='7.1', snapshot_options={"AutomatedSnapshotStartHour":2}, vpc_options=vpc_options, elasticsearch_cluster_config=ESconfig, tags=[core.Tag(key="Environment",value="prod"), core.Tag(key="Type",value="Log")], encryption_at_rest_options={"Enabled": True,"KmsKeyId": self.GenerateKmsKey(ZachESInstanceName)}, node_to_node_encryption_options={"Enabled" : True}, advanced_options={"indices.query.bool.max_clause_count":"2000"} ) core.CfnOutput(self, ZachESInstanceName + "ID", value=ZachESInstance.logical_id) core.CfnOutput(self, ZachESInstanceName + "Domain", value=ZachESInstance.domain_name) core.CfnOutput(self, ZachESInstanceName + "Ver", value=ZachESInstance.elasticsearch_version) core.CfnOutput(self, ZachESInstanceName + "VPC", value=ZachESInstance.vpc_options.to_string())
def __init__(self, *args, **kwargs): super(LoggignResources, self).__init__(*args, **kwargs) # Netowrk self.vpc = ec2.Vpc(self, 'logging-vpc') self.backup_bucket = s3.Bucket(self, 'logging-backup', bucket_name='logging-backup-bucket') self.elastic_domain = es.CfnDomain(self, 'logging-es-cluster') self.stream = firehose.CfnDeliveryStream( self, 'logging-stream', delivery_stream_name='logging-stream-firehose', delivery_stream_type='DirectPut', elasticsearch_destination_configuration=self.elastic_domain, s3_destination_configuration=self.backup_bucket)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) self.elasticSearch = ES.CfnDomain( self, "CDKElasticSearch", domain_name="cdk-elasticsearch", elasticsearch_version="7.4", access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": ["*"] }, "Action": ["es:*"], "Resource": "arn:aws:es:us-east-1:065035205697:domain/cdk-test/*" }] }, elasticsearch_cluster_config=ES.CfnDomain. ElasticsearchClusterConfigProperty( dedicated_master_enabled= True, # NOTE: Change to True in Production dedicated_master_count= 3, # Even numbers of dedicated masters are not recommended. dedicated_master_type="t2.small.elasticsearch", instance_count=1, instance_type="t2.small.elasticsearch", zone_awareness_enabled=False), vpc_options=ES.CfnDomain.VPCOptionsProperty( security_group_ids=["sg-e0fea685"], subnet_ids=["subnet-01e89264"]), ebs_options=ES.CfnDomain.EBSOptionsProperty(ebs_enabled=True, iops=0, volume_size=20, volume_type="gp2"), snapshot_options=ES.CfnDomain.SnapshotOptionsProperty( automated_snapshot_start_hour=0), cognito_options=ES.CfnDomain.CognitoOptionsProperty(enabled=False))
def __init__(self, scope: core.Construct, id: str,vpc: ec2.Vpc, kibanasg, **kwargs) -> None: super().__init__(scope, id, **kwargs) prj_name = self.node.try_get_context("project_name") env_name = self.node.try_get_context("env") subnets = [subnet.subnet_id for subnet in vpc.private_subnets] es_domain = es.CfnDomain(self, 'esdomain', domain_name=prj_name+'-'+env_name+'-domain', elasticsearch_cluster_config=es.CfnDomain.ElasticsearchClusterConfigProperty( dedicated_master_enabled=False, instance_count=1, instance_type='t2.small.elasticsearch' ), ebs_options=es.CfnDomain.EBSOptionsProperty( ebs_enabled=True, volume_type='gp2', volume_size=10 ), vpc_options=es.CfnDomain.VPCOptionsProperty( security_group_ids=[kibanasg.security_group_id], subnet_ids=[subnets.pop()] ), elasticsearch_version='7.4' ) es_domain.access_policies={ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": "es:*", "Resource": "*" } ] }
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) dev_domain = es.Domain(self, "QueueingDomain", version=es.ElasticsearchVersion.V7_9, enable_version_upgrade=True) self.elasticSearch = es.CfnDomain( self, "CDKElasticSearch", domain_name="cdk-elasticsearch", elasticsearch_version="7.4", access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": ["*"] }, "Action": ["es:*"], "Resource": "arn:aws:es:us-east-1:755723993001:domain/cdk-test/*" }] }, elasticsearch_cluster_config=es.CfnDomain. ElasticsearchClusterConfigProperty( instance_count=1, instance_type="t3.small.elasticsearch", zone_awareness_enabled=False), ebs_options=es.CfnDomain.EBSOptionsProperty(ebs_enabled=True, iops=0, volume_size=20, volume_type="gp2"), snapshot_options=es.CfnDomain.SnapshotOptionsProperty( automated_snapshot_start_hour=0))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) r = requests.get("http://ifconfig.me") myip = r.text + "/32" vpc = ec2.Vpc(self, "VPC", nat_gateways=0, max_azs=3, subnet_configuration=[ ec2.SubnetConfiguration( name="public", subnet_type=ec2.SubnetType.PUBLIC) ]) amzn_linux = ec2.MachineImage.latest_amazon_linux( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=ec2.AmazonLinuxEdition.STANDARD, virtualization=ec2.AmazonLinuxVirt.HVM, storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE) sg = ec2.SecurityGroup(self, "greg-sg", vpc=vpc, allow_all_outbound=True) # sg.add_ingress_rule(ec2.Peer.ipv4(myip), ec2.Port.tcp(22)) instance = ec2.Instance(self, "greg-ec2", instance_type=ec2.InstanceType('c5.xlarge'), machine_image=amzn_linux, vpc=vpc, key_name='gregkey', security_group=sg) core.CfnOutput(self, "output_ssh_bastion_public_ip", value=instance.instance_public_ip) core.CfnOutput(self, "output_ssh_bastion_private_ip", value=instance.instance_private_ip) # es domain helpful links # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticsearch-domain.html#cfn-elasticsearch-domain-elasticsearchclusterconfig # https://github.com/aws/aws-cdk/issues/2873 # https://sourcecodequery.com/example-method/core.Tag # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-elasticsearch-domain.html#cfn-elasticsearch-domain-elasticsearchclusterconfig es_cluster_config = { "InstanceCount": 3, "InstanceType": "m4.xlarge.elasticsearch", "DedicatedMasterEnabled": True, "DedicatedMasterCount": 3 } es_access_policy = { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:*"], "Condition": { "IpAddress": { "aws:SourceIp": [ myip, instance.instance_public_ip, instance.instance_private_ip ] } }, }] } es_storage = { "ebsEnabled": True, "volumeSize": 50, "volumeType": "gp2" } es_domain = elasticsearch.CfnDomain( self, "greg-es", elasticsearch_version="7.4", elasticsearch_cluster_config=es_cluster_config, access_policies=es_access_policy, ebs_options=es_storage, ) core.CfnOutput(self, "output_es_domain_endpoint", value=es_domain.attr_domain_endpoint)
def __init__( self, scope: core.Construct, id: str, vpc_stack, # kafka_stack, client: bool = True, **kwargs, ) -> None: super().__init__(scope, id, **kwargs) # ensure that the service linked role exists ensure_service_linked_role("es.amazonaws.com") # cloudwatch log group elastic_log_group = logs.LogGroup( self, "elastic_log_group", log_group_name="elkk/elastic/aes", removal_policy=core.RemovalPolicy.DESTROY, retention=logs.RetentionDays.ONE_WEEK, ) # security group for elastic client elastic_client_security_group = ec2.SecurityGroup( self, "elastic_client_security_group", vpc=vpc_stack.get_vpc, description="elastic client security group", allow_all_outbound=True, ) core.Tag.add(elastic_client_security_group, "project", constants["PROJECT_TAG"]) core.Tag.add(elastic_client_security_group, "Name", "elastic_client_sg") # Open port 22 for SSH elastic_client_security_group.add_ingress_rule( ec2.Peer.ipv4(f"{external_ip}/32"), ec2.Port.tcp(22), "from own public ip", ) # Open port for tunnel elastic_client_security_group.add_ingress_rule( ec2.Peer.ipv4(f"{external_ip}/32"), ec2.Port.tcp(9200), "for ssh tunnel", ) # security group for elastic self.elastic_security_group = ec2.SecurityGroup( self, "elastic_security_group", vpc=vpc_stack.get_vpc, description="elastic security group", allow_all_outbound=True, ) core.Tag.add(self.elastic_security_group, "project", constants["PROJECT_TAG"]) core.Tag.add(self.elastic_security_group, "Name", "elastic_sg") # ingress for elastic from self self.elastic_security_group.connections.allow_from( self.elastic_security_group, ec2.Port.all_traffic(), "within elastic", ) # ingress for elastic from elastic client self.elastic_security_group.connections.allow_from( elastic_client_security_group, ec2.Port.all_traffic(), "from elastic client", ) # ingress for elastic client from elastic elastic_client_security_group.connections.allow_from( self.elastic_security_group, ec2.Port.all_traffic(), "from elastic", ) # elastic policy elastic_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "es:*", ], resources=["*"], ) elastic_policy.add_any_principal() elastic_document = iam.PolicyDocument() elastic_document.add_statements(elastic_policy) # cluster config cluster_config = { "instanceCount": constants["ELASTIC_INSTANCE_COUNT"], "instanceType": constants["ELASTIC_INSTANCE"], "zoneAwarenessEnabled": True, "zoneAwarenessConfig": { "availabilityZoneCount": 3 }, } if constants["ELASTIC_DEDICATED_MASTER"] == True: cluster_config["dedicatedMasterEnabled"] = True cluster_config["dedicatedMasterType"] = constants[ "ELASTIC_MASTER_INSTANCE"] cluster_config["dedicatedMasterCount"] = constants[ "ELASTIC_MASTER_COUNT"] # create the elastic cluster self.elastic_domain = aes.CfnDomain( self, "elastic_domain", elasticsearch_cluster_config=cluster_config, elasticsearch_version=constants["ELASTIC_VERSION"], ebs_options={ "ebsEnabled": True, "volumeSize": 10 }, vpc_options={ "securityGroupIds": [self.elastic_security_group.security_group_id], "subnetIds": vpc_stack.get_vpc_private_subnet_ids, }, access_policies=elastic_document, #log_publishing_options={"enabled": True}, #cognito_options={"enabled": True}, ) core.Tag.add(self.elastic_domain, "project", constants["PROJECT_TAG"]) # instance for elasticsearch if client == True: # userdata for kafka client elastic_userdata = user_data_init( log_group_name="elkk/elastic/instance") # create the instance elastic_instance = ec2.Instance( self, "elastic_client", instance_type=ec2.InstanceType( constants["ELASTIC_CLIENT_INSTANCE"]), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2), vpc=vpc_stack.get_vpc, vpc_subnets={"subnet_type": ec2.SubnetType.PUBLIC}, key_name=constants["KEY_PAIR"], security_group=elastic_client_security_group, user_data=elastic_userdata, ) core.Tag.add(elastic_instance, "project", constants["PROJECT_TAG"]) # needs elastic domain to be available elastic_instance.node.add_dependency(self.elastic_domain) # create policies for EC2 to connect to Elastic access_elastic_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "es:ListDomainNames", "es:DescribeElasticsearchDomain", "es:ESHttpPut", ], resources=["*"], ) # add the role permissions elastic_instance.add_to_role_policy( statement=access_elastic_policy) # add log permissions instance_add_log_permissions(elastic_instance) # add the signal elastic_userdata.add_signal_on_exit_command( resource=elastic_instance) # add creation policy for instance elastic_instance.instance.cfn_options.creation_policy = core.CfnCreationPolicy( resource_signal=core.CfnResourceSignal(count=1, timeout="PT10M"))
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) vpc = aws_ec2.Vpc( self, "ElasticsearchHolVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_domain_name = 'es-hol' es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=es_domain_name, elasticsearch_version="7.7", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(es_domain_name)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids }) cdk.Tags.of(es_cfn_domain).add('Name', 'es-hol') cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'BastionHostPublicDNSName', value=bastion_host.instance_public_dns_name, export_name='BastionHostPublicDNSName') cdk.CfnOutput(self, 'ESDomainEndpoint', value=es_cfn_domain.attr_domain_endpoint, export_name='ESDomainEndpoint') cdk.CfnOutput( self, 'ESDashboardsURL', value=f"{es_cfn_domain.attr_domain_endpoint}/_dashboards/", export_name='ESDashboardsURL')
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here vpc = aws_ec2.Vpc( self, "AnalyticsWorkshopVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') core.Tag.add(sg_bastion_host, 'Name', 'bastion-host-sg') #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) #XXX: In order to test data pipeline, add {Kinesis, KinesisFirehose}FullAccess Policy to the bastion host. bastion_host.role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["kinesis:*"])) bastion_host.role.add_to_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["firehose:*"])) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') core.Tag.add(sg_use_es, 'Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') core.Tag.add(sg_es, 'Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="aws-analytics-immersion-day-{region}-{account}". format(region=kwargs['env'].region, account=kwargs['env'].account)) trans_kinesis_stream = kinesis.Stream( self, "AnalyticsWorkshopKinesisStreams", stream_name='retail-trans') firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[trans_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/retail-trans" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) trans_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToS3", delivery_stream_name="retail-trans", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": trans_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] "prefix": "json-data/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/", "errorOutputPrefix": "error-json/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}", "roleArn": firehose_role.role_arn }) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_domain_name = 'retail' es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=es_domain_name, elasticsearch_version="7.4", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(es_domain_name)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tag.add(es_cfn_domain, 'Name', 'analytics-workshop-es') #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, S3_BUCKET_LAMBDA_LAYER_LIB) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/es-lib.zip")) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertToES", handler="upsert_to_es.lambda_handler", description="Upsert records into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, #TODO: MUST set appropriate environment variables for your workloads. 'ES_INDEX': 'retail', 'ES_TYPE': 'trans', 'REQUIRED_FIELDS': 'Invoice,StockCode,Customer_ID', 'REGION_NAME': kwargs['env'].region, 'DATE_TYPE_FIELDS': 'InvoiceDate' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_es], vpc=vpc) trans_kinesis_event_source = KinesisEventSource( trans_kinesis_stream, batch_size=1000, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(trans_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertToESLogGroup", log_group_name="/aws/lambda/UpsertToES", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) merge_small_files_lambda_fn = _lambda.Function( self, "MergeSmallFiles", runtime=_lambda.Runtime.PYTHON_3_7, function_name="MergeSmallFiles", handler="athena_ctas.lambda_handler", description="Merge small files in S3", code=_lambda.Code.asset("./src/main/python/MergeSmallFiles"), environment={ #TODO: MUST set appropriate environment variables for your workloads. 'OLD_DATABASE': 'mydatabase', 'OLD_TABLE_NAME': 'retail_trans_json', 'NEW_DATABASE': 'mydatabase', 'NEW_TABLE_NAME': 'ctas_retail_trans_parquet', 'WORK_GROUP': 'primary', 'OUTPUT_PREFIX': 's3://{}'.format( os.path.join(s3_bucket.bucket_name, 'parquet-retail-trans')), 'STAGING_OUTPUT_PREFIX': 's3://{}'.format(os.path.join(s3_bucket.bucket_name, 'tmp')), 'COLUMN_NAMES': 'invoice,stockcode,description,quantity,invoicedate,price,customer_id,country', }, timeout=core.Duration.minutes(5)) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["athena:*"])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "s3:Get*", "s3:List*", "s3:AbortMultipartUpload", "s3:PutObject", ])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:CreateDatabase", "glue:DeleteDatabase", "glue:GetDatabase", "glue:GetDatabases", "glue:UpdateDatabase", "glue:CreateTable", "glue:DeleteTable", "glue:BatchDeleteTable", "glue:UpdateTable", "glue:GetTable", "glue:GetTables", "glue:BatchCreatePartition", "glue:CreatePartition", "glue:DeletePartition", "glue:BatchDeletePartition", "glue:UpdatePartition", "glue:GetPartition", "glue:GetPartitions", "glue:BatchGetPartition" ])) merge_small_files_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["lakeformation:GetDataAccess"])) lambda_fn_target = aws_events_targets.LambdaFunction( merge_small_files_lambda_fn) aws_events.Rule(self, "ScheduleRule", schedule=aws_events.Schedule.cron(minute="5"), targets=[lambda_fn_target]) log_group = aws_logs.LogGroup( self, "MergeSmallFilesLogGroup", log_group_name="/aws/lambda/MergeSmallFiles", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(merge_small_files_lambda_fn)
def __init__(self, scope: core.Construct, id: str, domain_prefix: str, other_account: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) user_pool = cognito.CfnUserPool( scope=self, id="user-pool", admin_create_user_config=cognito.CfnUserPool.AdminCreateUserConfigProperty( allow_admin_create_user_only=True, ), policies=cognito.CfnUserPool.PoliciesProperty( password_policy=cognito.CfnUserPool.PasswordPolicyProperty( minimum_length=20, )), username_attributes=["email"], auto_verified_attributes=["email"], ) cognito.CfnUserPoolDomain( scope=self, id="cognito-user-pool-domain", domain=f"{domain_prefix}-{core.Aws.ACCOUNT_ID}", user_pool_id=user_pool.ref, ) id_pool = cognito.CfnIdentityPool( scope=self, id="identity-pool", allow_unauthenticated_identities=False, cognito_identity_providers=[], ) auth_role = iam.Role( scope=self, id="auth-role", assumed_by=iam.FederatedPrincipal( federated="cognito-identity.amazonaws.com", conditions={ "StringEquals": {"cognito-identity.amazonaws.com:aud": id_pool.ref}, "ForAnyValue:StringLike": {"cognito-identity.amazonaws.com:amr": "authenticated"}, }, assume_role_action="sts:AssumeRoleWithWebIdentity"), ) es_role = iam.Role( scope=self, id="es-role", assumed_by=iam.ServicePrincipal('es.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AmazonESCognitoAccess" ) ], ) es_domain = elasticsearch.CfnDomain( scope=self, id="search-domain", elasticsearch_cluster_config=elasticsearch.CfnDomain.ElasticsearchClusterConfigProperty( instance_count=2, instance_type="m5.large.elasticsearch", ), node_to_node_encryption_options=elasticsearch.CfnDomain.NodeToNodeEncryptionOptionsProperty( enabled=True), encryption_at_rest_options=elasticsearch.CfnDomain.EncryptionAtRestOptionsProperty( enabled=True), ebs_options=elasticsearch.CfnDomain.EBSOptionsProperty( ebs_enabled=True, volume_size=20), elasticsearch_version="7.4", domain_name=domain_prefix, access_policies={ "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "AWS": auth_role.role_arn }, "Action": [ "es:ESHttpGet", "es:ESHttpPut", "es:ESHttpPost", "es:ESHttpDelete" ], "Resource": "arn:aws:es:" + core.Aws.REGION + ":" + core.Aws.ACCOUNT_ID + ":domain/" + domain_prefix + "/*" }, ] }, ) es_domain.add_property_override( 'CognitoOptions.Enabled', True) es_domain.add_property_override( 'CognitoOptions.IdentityPoolId', id_pool.ref) es_domain.add_property_override( 'CognitoOptions.RoleArn', es_role.role_arn) es_domain.add_property_override( 'CognitoOptions.UserPoolId', user_pool.ref) cognito.CfnIdentityPoolRoleAttachment( scope=self, id='user-pool-role-attachment', identity_pool_id=id_pool.ref, roles={ 'authenticated': auth_role.role_arn } ) es_external_role = iam.Role( scope=self, id="logger-role", assumed_by=iam.CompositePrincipal( iam.ServicePrincipal("lambda.amazonaws.com"), iam.AccountPrincipal(other_account), ), description="role to use elastic search assumed by lambda", inline_policies={ "es_policy": iam.PolicyDocument(statements=[ iam.PolicyStatement( actions=[ "es:ESHttpPost", ], resources=[ es_domain.attr_arn + "/*", ], )]), }, ) core.CfnOutput( scope=self, id="es-host", value=es_domain.attr_domain_endpoint, ) core.CfnOutput( scope=self, id="es-region", value=core.Aws.REGION, ) core.CfnOutput( scope=self, id="es-external-role", value=es_external_role.role_arn, )
def __init__(self, scope: core.Construct, id: str, vpc, cognito_for_es, es_domain_name: str, stack_log_level: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # AWS Elasticsearch Domain # It is experimental as on Q2 2020 """ es_log_search = _es.Domain( self, "logSearcher", version=_es.ElasticsearchVersion.V7_1, capacity={ "master_nodes": 1, "data_nodes": 1 }, ebs={ "volume_size": 20, "volume_type": _ec2.EbsDeviceVolumeType.GP2 }, zone_awareness={ "availability_zone_count": 1 }, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True } ) """ # Access Policy for Elastic elastic_policy = _iam.PolicyStatement( effect=_iam.Effect.ALLOW, actions=[ "es:*", ], # principals={"AWS": es_auth_role.role_arn}, resources=[ # f"arn:aws:es:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:domain/{es_domain_name}/*" "*" ], ) elastic_policy.add_aws_account_principal(core.Aws.ACCOUNT_ID) elastic_document = _iam.PolicyDocument() elastic_document.add_statements(elastic_policy) # Security group for elastic self.elastic_security_group = _ec2.SecurityGroup( self, "elastic_security_group", vpc=vpc.get_vpc, description="elastic security group", allow_all_outbound=True, ) self.elastic_security_group.connections.allow_from( other=_ec2.Peer.ipv4(vpc.get_vpc.vpc_cidr_block), port_range=_ec2.Port.tcp(9200), description="Allow Incoming FluentBit Traffic") self.elastic_security_group.connections.allow_from( other=_ec2.Peer.ipv4(vpc.get_vpc.vpc_cidr_block), port_range=_ec2.Port.tcp(443), description="Allow Kibana Access") # Amazon ElasticSearch Cluster es_log_search = _es.CfnDomain( self, "logSearcher", domain_name=f"{es_domain_name}", elasticsearch_cluster_config={ # "dedicated_master_count": 1, "dedicated_master_enabled": False, "instanceCount": 2, "instanceType": "t3.small.elasticsearch", "zoneAwarenessEnabled": True, # "zoneAwarenessConfig": {"availability_zone_count": 2}, }, elasticsearch_version="7.1", ebs_options=_es.CfnDomain.EBSOptionsProperty(ebs_enabled=True, volume_size=10), # vpc_options={ # "securityGroupIds": [self.elastic_security_group.security_group_id], # "subnetIds": vpc.get_vpc_private_subnet_ids, # }, access_policies=elastic_document, cognito_options=_es.CfnDomain.CognitoOptionsProperty( enabled=True, identity_pool_id=cognito_for_es.get_es_identity_pool_ref, user_pool_id=cognito_for_es.get_es_user_pool_id, role_arn=cognito_for_es.get_es_role_arn)) es_log_search.access_policies = { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": cognito_for_es.get_es_auth_role_arn }, "Action": "es:*", "Resource": f"arn:aws:es:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:domain/{es_domain_name}/*" }] } es_endpoint_param = CreateSsmStringParameter( self, "esEndpointSsmParameter", _param_desc=f"ElasticSearch Domain Endpoint", _param_name="/miztiik-automation/es/endpoint", _param_value=f"{es_log_search.attr_domain_endpoint}") es_region_param = CreateSsmStringParameter( self, "esRegionSsmParameter", _param_desc=f"ElasticSearch Domain Region", _param_name="/miztiik-automation/es/region", _param_value=f"{core.Aws.REGION}") # Get latest version of Elasticsearch Endpoint & Region Parameter Name self.es_endpoint_param_name = "/miztiik-automation/es/endpoint" self.es_region_param_name = "/miztiik-automation/es/region" ########################################### ################# OUTPUTS ################# ########################################### output_0 = core.CfnOutput( self, "AutomationFrom", value=f"{GlobalArgs.SOURCE_INFO}", description= "To know more about this automation stack, check out our github page." ) output_1 = core.CfnOutput( self, "LogSearchInEsDomainEndpoint", value=f"{es_log_search.attr_domain_endpoint}", description=f"ElasticSearch Domain Endpoint") output_2 = core.CfnOutput( self, "kibanaUrl", value= f"https://{es_log_search.attr_domain_endpoint}/_plugin/kibana/", description="Access Kibana via this URL.")
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) vpc = aws_ec2.Vpc( self, "OctemberVPC", max_azs=2, # subnet_configuration=[{ # "cidrMask": 24, # "name": "Public", # "subnetType": aws_ec2.SubnetType.PUBLIC, # }, # { # "cidrMask": 24, # "name": "Private", # "subnetType": aws_ec2.SubnetType.PRIVATE # }, # { # "cidrMask": 28, # "name": "Isolated", # "subnetType": aws_ec2.SubnetType.ISOLATED, # "reserved": True # } # ], gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) dynamo_db_endpoint = vpc.add_gateway_endpoint( "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB) s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="octember-bizcard-{region}-{account}".format( region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID)) api = apigw.RestApi( self, "BizcardImageUploader", rest_api_name="BizcardImageUploader", description="This service serves uploading bizcard images into s3.", endpoint_types=[apigw.EndpointType.REGIONAL], binary_media_types=["image/png", "image/jpg"], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) rest_api_role = aws_iam.Role( self, "ApiGatewayRoleForS3", role_name="ApiGatewayRoleForS3FullAccess", assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3FullAccess") ]) list_objects_responses = [ apigw.IntegrationResponse( status_code="200", #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters # The response parameters from the backend response that API Gateway sends to the method response. # Use the destination as the key and the source as the value: # - The destination must be an existing response parameter in the MethodResponse property. # - The source must be an existing method request parameter or a static value. response_parameters={ 'method.response.header.Timestamp': 'integration.response.header.Date', 'method.response.header.Content-Length': 'integration.response.header.Content-Length', 'method.response.header.Content-Type': 'integration.response.header.Content-Type' }), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ] list_objects_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses) get_s3_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path='/', options=list_objects_integration_options) api.root.add_method( "GET", get_s3_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={'method.request.header.Content-Type': False}) get_s3_folder_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value. # The source must be an existing method request parameter or a static value. request_parameters={ "integration.request.path.bucket": "method.request.path.folder" }) get_s3_folder_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}", options=get_s3_folder_integration_options) s3_folder = api.root.add_resource('{folder}') s3_folder.add_method( "GET", get_s3_folder_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True }) get_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, request_parameters={ "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) get_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}/{object}", options=get_s3_item_integration_options) s3_item = s3_folder.add_resource('{item}') s3_item.add_method( "GET", get_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) put_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=[ apigw.IntegrationResponse(status_code="200"), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ], request_parameters={ "integration.request.header.Content-Type": "method.request.header.Content-Type", "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) put_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="PUT", path="{bucket}/{object}", options=put_s3_item_integration_options) s3_item.add_method( "PUT", put_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) ddb_table = dynamodb.Table( self, "BizcardImageMetaInfoDdbTable", table_name="OctemberBizcardImgMeta", partition_key=dynamodb.Attribute( name="image_id", type=dynamodb.AttributeType.STRING), billing_mode=dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5) img_kinesis_stream = kinesis.Stream( self, "BizcardImagePath", stream_name="octember-bizcard-image") # create lambda function trigger_textract_lambda_fn = _lambda.Function( self, "TriggerTextExtractorFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="TriggerTextExtractorFromImage", handler="trigger_text_extract_from_s3_image.lambda_handler", description="Trigger to extract text from an image in S3", code=_lambda.Code.asset( "./src/main/python/TriggerTextExtractFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) ddb_table_rw_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ddb_table.table_arn], actions=[ "dynamodb:BatchGetItem", "dynamodb:Describe*", "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query", "dynamodb:Scan", "dynamodb:BatchWriteItem", "dynamodb:DeleteItem", "dynamodb:PutItem", "dynamodb:UpdateItem", "dax:Describe*", "dax:List*", "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan", "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem", "dax:UpdateItem" ]) trigger_textract_lambda_fn.add_to_role_policy( ddb_table_rw_policy_statement) trigger_textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[img_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) # assign notification for the s3 event type (ex: OBJECT_CREATED) s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/", suffix=".jpg") s3_event_source = S3EventSource(s3_bucket, events=[s3.EventType.OBJECT_CREATED], filters=[s3_event_filter]) trigger_textract_lambda_fn.add_event_source(s3_event_source) #XXX: https://github.com/aws/aws-cdk/issues/2240 # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props log_group = aws_logs.LogGroup( self, "TriggerTextractLogGroup", log_group_name="/aws/lambda/TriggerTextExtractorFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(trigger_textract_lambda_fn) text_kinesis_stream = kinesis.Stream( self, "BizcardTextData", stream_name="octember-bizcard-txt") textract_lambda_fn = _lambda.Function( self, "GetTextFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="GetTextFromImage", handler="get_text_from_s3_image.lambda_handler", description="extract text from an image in S3", code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["textract:*"])) img_kinesis_event_source = KinesisEventSource( img_kinesis_stream, batch_size=100, starting_position=_lambda.StartingPosition.LATEST) textract_lambda_fn.add_event_source(img_kinesis_event_source) log_group = aws_logs.LogGroup( self, "GetTextFromImageLogGroup", log_group_name="/aws/lambda/GetTextFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(textract_lambda_fn) sg_use_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard elasticsearch client', security_group_name='use-octember-bizcard-es') core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es') sg_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard elasticsearch', security_group_name='octember-bizcard-es') core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='use-octember-bizcard-es') sg_ssh_access = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for bastion host', security_group_name='octember-bastion-host-sg') core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host') sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(), connection=aws_ec2.Port.tcp(22), description='ssh access') bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=aws_ec2.InstanceType('t3.nano'), security_group=sg_ssh_access, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC)) bastion_host.instance.add_security_group(sg_use_bizcard_es) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_cfn_domain = aws_elasticsearch.CfnDomain( self, 'BizcardSearch', elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name="octember-bizcard", elasticsearch_version="7.9", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn(service="es", resource="domain", resource_name="octember-bizcard/*") }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_bizcard_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es') s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name") #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, s3_lib_bucket_name) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-es-lib.zip")) redis_lib_layer = _lambda.LayerVersion( self, "RedisLib", layer_version_name="redis-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-redis-lib.zip")) #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertBizcardToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToElasticSearch", handler="upsert_bizcard_to_es.lambda_handler", description="Upsert bizcard text into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_bizcard_es], vpc=vpc) text_kinesis_event_source = KinesisEventSource( text_kinesis_stream, batch_size=99, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToESLogGroup", log_group_name="/aws/lambda/UpsertBizcardToElasticSearch", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "BizcardTextToS3", delivery_stream_name="octember-bizcard-txt-to-s3", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": text_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "GZIP", "prefix": "bizcard-text/", "roleArn": firehose_role.role_arn }) sg_use_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache client', security_group_name='use-octember-bizcard-es-cache') core.Tags.of(sg_use_bizcard_es_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache', security_group_name='octember-bizcard-es-cache') core.Tags.of(sg_bizcard_es_cache).add('Name', 'octember-bizcard-es-cache') sg_bizcard_es_cache.add_ingress_rule( peer=sg_use_bizcard_es_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-es-cache') es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "QueryCacheSubnetGroup", description="subnet group for octember-bizcard-es-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-es-cache') es_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardSearchQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-es-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-es-cache', vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id]) #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster. # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname es_query_cache.add_depends_on(es_query_cache_subnet_group) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 bizcard_search_lambda_fn = _lambda.Function( self, "BizcardSearchServer", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardSearchProxy", handler="es_search_bizcard.lambda_handler", description="Proxy server to search bizcard text", code=_lambda.Code.asset("./src/main/python/SearchBizcard"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard', 'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[es_lib_layer, redis_lib_layer], security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache], vpc=vpc) #XXX: create API Gateway + LambdaProxy search_api = apigw.LambdaRestApi( self, "BizcardSearchAPI", handler=bizcard_search_lambda_fn, proxy=False, rest_api_name="BizcardSearch", description="This service serves searching bizcard text.", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_search = search_api.root.add_resource('search') bizcard_search.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sg_use_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbClientSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db client', security_group_name='use-octember-bizcard-neptune') core.Tags.of(sg_use_bizcard_graph_db).add( 'Name', 'use-octember-bizcard-neptune') sg_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db', security_group_name='octember-bizcard-neptune') core.Tags.of(sg_bizcard_graph_db).add('Name', 'octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_use_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-octember-bizcard-neptune') bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup( self, "NeptuneSubnetGroup", db_subnet_group_description= "subnet group for octember-bizcard-neptune", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, db_subnet_group_name='octember-bizcard-neptune') bizcard_graph_db = aws_neptune.CfnDBCluster( self, "BizcardGraphDB", availability_zones=vpc.availability_zones, db_subnet_group_name=bizcard_graph_db_subnet_group. db_subnet_group_name, db_cluster_identifier="octember-bizcard", backup_retention_period=1, preferred_backup_window="08:45-09:15", preferred_maintenance_window="sun:18:00-sun:18:30", vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id]) bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group) bizcard_graph_db_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBReplicaInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard-replica", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance.add_depends_on( bizcard_graph_db_instance) gremlinpython_lib_layer = _lambda.LayerVersion( self, "GremlinPythonLib", layer_version_name="gremlinpython-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket( s3_lib_bucket, "var/octember-gremlinpython-lib.zip")) #XXX: https://github.com/aws/aws-cdk/issues/1342 upsert_to_neptune_lambda_fn = _lambda.Function( self, "UpsertBizcardToGraphDB", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToNeptune", handler="upsert_bizcard_to_graph_db.lambda_handler", description="Upsert bizcard into neptune", code=_lambda.Code.asset( "./src/main/python/UpsertBizcardToGraphDB"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port }, timeout=core.Duration.minutes(5), layers=[gremlinpython_lib_layer], security_groups=[sg_use_bizcard_graph_db], vpc=vpc) upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToGraphDBLogGroup", log_group_name="/aws/lambda/UpsertBizcardToNeptune", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_neptune_lambda_fn) sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache client', security_group_name='use-octember-bizcard-neptune-cache') core.Tags.of(sg_use_bizcard_neptune_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache', security_group_name='octember-bizcard-neptune-cache') core.Tags.of(sg_bizcard_neptune_cache).add( 'Name', 'octember-bizcard-neptune-cache') sg_bizcard_neptune_cache.add_ingress_rule( peer=sg_use_bizcard_neptune_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-neptune-cache') recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "RecommQueryCacheSubnetGroup", description="subnet group for octember-bizcard-neptune-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-neptune-cache') recomm_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardRecommQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-neptune-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-neptune-cache', vpc_security_group_ids=[ sg_bizcard_neptune_cache.security_group_id ]) recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group) bizcard_recomm_lambda_fn = _lambda.Function( self, "BizcardRecommender", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardRecommender", handler="neptune_recommend_bizcard.lambda_handler", description="This service serves PYMK(People You May Know).", code=_lambda.Code.asset("./src/main/python/RecommendBizcard"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port, 'ELASTICACHE_HOST': recomm_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[gremlinpython_lib_layer, redis_lib_layer], security_groups=[ sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache ], vpc=vpc) #XXX: create API Gateway + LambdaProxy recomm_api = apigw.LambdaRestApi( self, "BizcardRecommendAPI", handler=bizcard_recomm_lambda_fn, proxy=False, rest_api_name="BizcardRecommend", description="This service serves PYMK(People You May Know).", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_recomm = recomm_api.root.add_resource('pymk') bizcard_recomm.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*" ], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*". format(region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, cluster_id=bizcard_graph_db. attr_cluster_resource_id) ], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-OctemberBizcard', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc }) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint, NeptuneClusterPort=bizcard_graph_db.attr_port, AWS_Region=core.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(neptune_wb_lifecycle_content)) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'AWSNeptuneWorkbenchOctemberBizcardLCConfig', on_start=[neptune_wb_lifecycle_config_prop]) neptune_workbench = aws_sagemaker.CfnNotebookInstance( self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='OctemberBizcard-NeptuneWorkbench', root_access='Disabled', security_group_ids=[sg_use_bizcard_graph_db.security_group_name], subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # s3 s3_bucket_name = "{}-s3-{}".format(Constant.PROJECT_NAME, self._get_UUID(4)) _s3.Bucket( self, id=s3_bucket_name, bucket_name=s3_bucket_name, removal_policy=core.RemovalPolicy. DESTROY, #TODO: destroy for test # removal_policy=core.RemovalPolicy.RETAIN ) # step 1. VPC # 如果在已有的Vpc 中建立环境, 可以用下面这句, 需要传入 vpc_id # vpc = ec2.Vpc.from_lookup(self, "VPC", vpc_id='') vpc = ec2.Vpc( self, "VPC", max_azs=2, # 两个分区, 每个分区建一个子网 cidr="10.10.0.0/16", # configuration will create 3 groups in 2 AZs = 6 subnets. subnet_configuration=[ ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC, name="Public", cidr_mask=24), ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE, name="Private", cidr_mask=24), # ec2.SubnetConfiguration( # subnet_type=ec2.SubnetType.ISOLATED, # name="DB", # cidr_mask=24 # ) ], # nat_gateway_provider=ec2.NatProvider.gateway(), # nat_gateways=2, ) # ES 需要部署到私有子网中 selection = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE) # step 2. 访问S3 + ES集群需要的 iam_instance_profile # action -> statement -> policy -> role -> instance profile -> attach ec2 actions = [ "ec2:CreateNetworkInterface", "ec2:DeleteNetworkInterface", "ec2:DescribeNetworkInterfaces", "ec2:ModifyNetworkInterfaceAttribute", "ec2:DescribeSecurityGroups", "ec2:DescribeSubnets", "ec2:DescribeVpcs", "s3:*" ] policyStatement = PolicyStatement(actions=actions, effect=Effect.ALLOW) policyStatement.add_all_resources() policyStatement.sid = "Stmt1480452973134" policy_name = "{}-ec2-es-policy".format(Constant.PROJECT_NAME) ec2_policy = Policy(self, policy_name, policy_name=policy_name) ec2_policy.add_statements(policyStatement) role_name = "{}-ec2-es-role".format(Constant.PROJECT_NAME) access_es_role = Role( self, role_name, role_name=role_name, assumed_by=ServicePrincipal('ec2.amazonaws.com.cn')) ec2_policy.attach_to_role(access_es_role) profile_name = "{}-ec2-es-profile".format(Constant.PROJECT_NAME) instance_profile = CfnInstanceProfile( self, profile_name, instance_profile_name=profile_name, roles=[access_es_role.role_name]) # step 4. ES # 生产环境建议设置安全组, 只接收VPC内443端口请求 sg_es_cluster_name = "{}-sg-es".format(Constant.PROJECT_NAME) sg_es_cluster = ec2.SecurityGroup( self, id=sg_es_cluster_name, vpc=vpc, security_group_name=sg_es_cluster_name) sg_es_cluster.add_ingress_rule(peer=ec2.Peer.ipv4(vpc.vpc_cidr_block), connection=ec2.Port.tcp(443)) es_name = Constant.PROJECT_NAME es_arn = self.format_arn(service="es", resource="domain", sep="/", resource_name=es_name) es = elasticsearch.CfnDomain( self, es_name, elasticsearch_version='7.1', domain_name=es_name, node_to_node_encryption_options={"enabled": False}, vpc_options={ "securityGroupIds": [sg_es_cluster.security_group_id ], # 生产环境建议设置安全组, 只接收VPC内443端口请求 # 如果开启多个节点, 需要配置多个子网, 目前测试只有一个ES 节点, 就只用到一个子网 "subnetIds": selection.subnet_ids[:1] }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, elasticsearch_cluster_config={ # 生成环境需要开启三个 # "dedicatedMasterCount": 3, # "dedicatedMasterEnabled": True, # "dedicatedMasterType": 'm4.large.elasticsearch', "instanceCount": 1, "instanceType": 'm4.large.elasticsearch', "zoneAwarenessEnabled": False }) es.access_policies = { "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": "es:*", "Resource": "{}/*".format(es_arn) }] } # step 5. SNS topic = sns.Topic(self, "topic") topic.add_subscription(subs.EmailSubscription(Constant.EMAIL_ADDRESS)) # 设置SNS endpoint, 让lambda 可以从vpc 内部访问 vpc.add_interface_endpoint( "SNSEndpoint", service=ec2.InterfaceVpcEndpointAwsService.SNS) # step 6. Lambda lambdaFn = lambda_.Function(self, "Singleton", code=lambda_.Code.asset('lambda'), handler='hello.handler', vpc=vpc, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE), timeout=core.Duration.seconds(300), runtime=lambda_.Runtime.PYTHON_3_7, environment={ 'SNS_TOPIC_ARN': topic.topic_arn, 'ES_ENDPOINT': es.attr_domain_endpoint, 'ES_INDEX_NAME': Constant.ES_INDEX_NAME }) # step 7. Cloud watch event rule = events.Rule( self, "Rule", schedule=events.Schedule.cron(minute='0/5', hour='*', month='*', week_day='*', year='*'), ) rule.add_target(targets.LambdaFunction(lambdaFn)) #给Lambda 添加发布SNS的权限 topic.grant_publish(lambdaFn) # Create ALB alb_name = "{}-alb".format(Constant.PROJECT_NAME) alb = elb.ApplicationLoadBalancer(self, alb_name, vpc=vpc, internet_facing=True, load_balancer_name=alb_name) alb.connections.allow_from_any_ipv4(ec2.Port.tcp(80), "Internet access ALB 80") listener = alb.add_listener("my80", port=80, open=True) # Create Autoscaling Group with fixed 2*EC2 hosts user_data = user_data_content.format(es.attr_domain_endpoint, Constant.REGION_NAME, Constant.ES_LOG_PATH, Constant.ES_INDEX_NAME, s3_bucket_name) # step 3. 创建堡垒机 bastion_name = "{}-bastion".format(Constant.PROJECT_NAME) bastion = ec2.BastionHostLinux( self, bastion_name, vpc=vpc, subnet_selection=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PUBLIC), instance_name=bastion_name, instance_type=ec2.InstanceType( instance_type_identifier="m4.large")) bastion.instance.instance.add_property_override( "KeyName", Constant.EC2_KEY_NAME) bastion.connections.allow_from_any_ipv4( ec2.Port.tcp(22), "Internet access SSH") # 生成环境可以限定IP allow_from bastion.connections.allow_from_any_ipv4(ec2.Port.tcp(8080), "Internet access HTTP") # 测试需要 # bastion.connections.allow_from_any_ipv4(ec2.Port.tcp(443), "Internet access HTTPS") # 测试需要 bastion.instance.instance.iam_instance_profile = instance_profile.instance_profile_name # 给EC2设置 profile , 相当于Role bastion.instance.instance.image_id = ami_map.get( Constant.REGION_NAME) # 指定AMI ID #堡垒机的user_data 只能执行一次, 如果要执行多次, 请参考 https://amazonaws-china.com/premiumsupport/knowledge-center/execute-user-data-ec2/?nc1=h_ls bastion_user_data = "/home/ec2-user/start.sh {} {} '{}' {} {}".format( es.attr_domain_endpoint, Constant.REGION_NAME, Constant.ES_LOG_PATH, Constant.ES_INDEX_NAME, s3_bucket_name) bastion.instance.add_user_data( "date >> /home/ec2-user/root.txt") # 查看启动脚本是否执行 bastion.instance.add_user_data(bastion_user_data) asg_name = "{}-asg".format(Constant.PROJECT_NAME) asg = autoscaling.AutoScalingGroup( self, asg_name, vpc=vpc, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PUBLIC), # PUBLIC for debug instance_type=ec2.InstanceType( instance_type_identifier="m4.large"), machine_image=my_ami, key_name=Constant.EC2_KEY_NAME, user_data=ec2.UserData.custom(user_data), desired_capacity=1, min_capacity=1, max_capacity=1, role=access_es_role) asg.connections.allow_from( alb, ec2.Port.tcp(8080), "ALB access 80 port of EC2 in Autoscaling Group") # asg.connections.allow_from_any_ipv4(ec2.Port.tcp(8080), "Internet access HTTP for test") # 测试用 asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22), "Internet access SSH") # for debug listener.add_targets("addTargetGroup", port=8080, targets=[asg]) core.CfnOutput(self, "s3_bucket_name", value=s3_bucket_name, description='S3 bucket: store web log') core.CfnOutput(self, "ElasticSearchEndpointUrl", value=es.attr_domain_endpoint, description='Elastic Search Url') # Elastic Search 统计log 数量, 可以在堡垒机上执行, 快速查看日志数量。 core.CfnOutput(self, "CmdGetCountIndex", value='curl https://{}/{}/_count'.format( es.attr_domain_endpoint, Constant.ES_INDEX_NAME), description='Count search result. ') # 堡垒机的登录命令, 可以直接复制使用 core.CfnOutput(self, "CmdSshToBastion", value='ssh -i ~/{}.pem ec2-user@{}'.format( Constant.EC2_KEY_NAME, bastion.instance_public_dns_name), description='cmd ssh to bastion') # 在堡垒机上启动服务的命令, 堡垒机重启以后, 需要执行下面的命令, 可以启动web服务 发送日志到ES core.CfnOutput( self, "CmdSshBastionStartWeb", value='sudo {}'.format(bastion_user_data), description="Cmd to start web+logstash+filebeat service") # ALB 的访问地址 core.CfnOutput(self, "UrlLoad_Balancer", value='http://{}'.format(alb.load_balancer_dns_name), description='ALB url ') # 堡垒机的web访问地址, 为了调试方便, 在堡垒机上也使用相同的AMI。 core.CfnOutput(self, "UrlBastion", value='http://{}:8080'.format( bastion.instance_public_dns_name), description="Bastion server web url ") # 下面这条输出的命令 是通过堡垒机和Elasticsearch 建立隧道, 在本地访问kibana。 core.CfnOutput( self, "CmdSshProxyToKibana", value='ssh -i ~/{}.pem ec2-user@{} -N -L 9200:{}:443'.format( Constant.EC2_KEY_NAME, bastion.instance_public_dns_name, es.attr_domain_endpoint), description="cmd: access kibana from bastion ssh. ") # 执行完上面的命令后, 在浏览器中打开下面的连接 core.CfnOutput(self, "UrlKibana", value='https://localhost:9200/_plugin/kibana/', description="kibana url ")
def __init__(self, scope: core.Construct, id: str, application_prefix: str, suffix: str, kda_role: Role, **kwargs): super().__init__(scope, id, **kwargs) stack = Stack.of(self) region = stack.region # Create Cognito User Pool self.__user_pool = CfnUserPool( scope=self, id='UserPool', admin_create_user_config={'allowAdminCreateUserOnly': True}, policies={'passwordPolicy': { 'minimumLength': 8 }}, username_attributes=['email'], auto_verified_attributes=['email'], user_pool_name=application_prefix + '_user_pool') # Create a Cognito User Pool Domain using the newly created Cognito User Pool CfnUserPoolDomain(scope=self, id='CognitoDomain', domain=application_prefix + '-' + suffix, user_pool_id=self.user_pool.ref) # Create Cognito Identity Pool self.__id_pool = CfnIdentityPool( scope=self, id='IdentityPool', allow_unauthenticated_identities=False, cognito_identity_providers=[], identity_pool_name=application_prefix + '_identity_pool') trust_relationship = FederatedPrincipal( federated='cognito-identity.amazonaws.com', conditions={ 'StringEquals': { 'cognito-identity.amazonaws.com:aud': self.id_pool.ref }, 'ForAnyValue:StringLike': { 'cognito-identity.amazonaws.com:amr': 'authenticated' } }, assume_role_action='sts:AssumeRoleWithWebIdentity') # IAM role for master user master_auth_role = Role(scope=self, id='MasterAuthRole', assumed_by=trust_relationship) # Role for authenticated user limited_auth_role = Role(scope=self, id='LimitedAuthRole', assumed_by=trust_relationship) # Attach Role to Identity Pool CfnIdentityPoolRoleAttachment( scope=self, id='userPoolRoleAttachment', identity_pool_id=self.id_pool.ref, roles={'authenticated': limited_auth_role.role_arn}) # Create master-user-group CfnUserPoolGroup(scope=self, id='AdminsGroup', user_pool_id=self.user_pool.ref, group_name='master-user-group', role_arn=master_auth_role.role_arn) # Create limited-user-group CfnUserPoolGroup(scope=self, id='UsersGroup', user_pool_id=self.user_pool.ref, group_name='limited-user-group', role_arn=limited_auth_role.role_arn) # Role for the Elasticsearch service to access Cognito es_role = Role(scope=self, id='EsRole', assumed_by=ServicePrincipal(service='es.amazonaws.com'), managed_policies=[ ManagedPolicy.from_aws_managed_policy_name( 'AmazonESCognitoAccess') ]) # Use the following command line to generate the python dependencies layer content # pip3 install -t lambda-layer/python/lib/python3.8/site-packages -r lambda/requirements.txt # Build the lambda layer assets subprocess.call([ 'pip', 'install', '-t', 'streaming/streaming_cdk/lambda-layer/python/lib/python3.8/site-packages', '-r', 'streaming/streaming_cdk/bootstrap-lambda/requirements.txt', '--upgrade' ]) requirements_layer = _lambda.LayerVersion( scope=self, id='PythonRequirementsTemplate', code=_lambda.Code.from_asset( 'streaming/streaming_cdk/lambda-layer'), compatible_runtimes=[_lambda.Runtime.PYTHON_3_8]) # This lambda function will bootstrap the Elasticsearch cluster bootstrap_function_name = 'AESBootstrap' register_template_lambda = _lambda.Function( scope=self, id='RegisterTemplate', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset( 'streaming/streaming_cdk/bootstrap-lambda'), handler='es-bootstrap.lambda_handler', environment={ 'REGION': region, 'KDA_ROLE_ARN': kda_role.role_arn, 'MASTER_ROLE_ARN': master_auth_role.role_arn }, layers=[requirements_layer], timeout=Duration.minutes(15), function_name=bootstrap_function_name) lambda_role = register_template_lambda.role lambda_role.add_to_policy( PolicyStatement( actions=['logs:CreateLogGroup'], resources=[stack.format_arn(service='logs', resource='*')])) lambda_role.add_to_policy( PolicyStatement( actions=['logs:CreateLogStream', 'logs:PutLogEvents'], resources=[ stack.format_arn(service='logs', resource='log_group', resource_name='/aws/lambda/' + bootstrap_function_name + ':*') ])) # Let the lambda assume the master role so that actions can be executed on the cluster # https://aws.amazon.com/premiumsupport/knowledge-center/lambda-function-assume-iam-role/ lambda_role.add_to_policy( PolicyStatement(actions=['sts:AssumeRole'], resources=[master_auth_role.role_arn])) master_auth_role.assume_role_policy.add_statements( PolicyStatement(actions=['sts:AssumeRole'], principals=[lambda_role])) # List all the roles that are allowed to access the Elasticsearch cluster. roles = [ ArnPrincipal(limited_auth_role.role_arn), ArnPrincipal(master_auth_role.role_arn), ArnPrincipal(kda_role.role_arn) ] # The users if register_template_lambda and register_template_lambda.role: roles.append(ArnPrincipal( lambda_role.role_arn)) # The lambda used to bootstrap # Create kms key kms_key = Key(scope=self, id='kms-es', alias='custom/es', description='KMS key for Elasticsearch domain', enable_key_rotation=True) # AES Log Groups es_app_log_group = logs.LogGroup(scope=self, id='EsAppLogGroup', retention=logs.RetentionDays.ONE_WEEK, removal_policy=RemovalPolicy.RETAIN) # Create the Elasticsearch domain es_domain_arn = stack.format_arn(service='es', resource='domain', resource_name=application_prefix + '/*') es_access_policy = PolicyDocument(statements=[ PolicyStatement(principals=roles, actions=[ 'es:ESHttpGet', 'es:ESHttpPut', 'es:ESHttpPost', 'es:ESHttpDelete' ], resources=[es_domain_arn]) ]) self.__es_domain = es.CfnDomain( scope=self, id='searchDomain', elasticsearch_cluster_config={ 'instanceType': 'r5.large.elasticsearch', 'instanceCount': 2, 'dedicatedMasterEnabled': True, 'dedicatedMasterCount': 3, 'dedicatedMasterType': 'r5.large.elasticsearch', 'zoneAwarenessEnabled': True, 'zoneAwarenessConfig': { 'AvailabilityZoneCount': '2' }, }, encryption_at_rest_options={ 'enabled': True, 'kmsKeyId': kms_key.key_id }, node_to_node_encryption_options={'enabled': True}, ebs_options={ 'volumeSize': 10, 'ebsEnabled': True }, elasticsearch_version='7.9', domain_name=application_prefix, access_policies=es_access_policy, cognito_options={ 'enabled': True, 'identityPoolId': self.id_pool.ref, 'roleArn': es_role.role_arn, 'userPoolId': self.user_pool.ref }, advanced_security_options={ 'enabled': True, 'internalUserDatabaseEnabled': False, 'masterUserOptions': { 'masterUserArn': master_auth_role.role_arn } }, domain_endpoint_options={ 'enforceHttps': True, 'tlsSecurityPolicy': 'Policy-Min-TLS-1-2-2019-07' }, # log_publishing_options={ # # 'ES_APPLICATION_LOGS': { # # 'enabled': True, # # 'cloud_watch_logs_log_group_arn': es_app_log_group.log_group_arn # # }, # # 'AUDIT_LOGS': { # # 'enabled': True, # # 'cloud_watch_logs_log_group_arn': '' # # }, # # 'SEARCH_SLOW_LOGS': { # # 'enabled': True, # # 'cloud_watch_logs_log_group_arn': '' # # }, # # 'INDEX_SLOW_LOGS': { # # 'enabled': True, # # 'cloud_watch_logs_log_group_arn': '' # # } # } ) # Not yet on the roadmap... # See https://github.com/aws-cloudformation/aws-cloudformation-coverage-roadmap/issues/283 # self.es_domain.add_property_override('ElasticsearchClusterConfig.WarmEnabled', True) # self.es_domain.add_property_override('ElasticsearchClusterConfig.WarmCount', 2) # self.es_domain.add_property_override('ElasticsearchClusterConfig.WarmType', 'ultrawarm1.large.elasticsearch') # Deny all roles from the authentication provider - users must be added to groups # This lambda function will bootstrap the Elasticsearch cluster cognito_function_name = 'CognitoFix' cognito_template_lambda = _lambda.Function( scope=self, id='CognitoFixLambda', runtime=_lambda.Runtime.PYTHON_3_8, code=_lambda.Code.from_asset( 'streaming/streaming_cdk/cognito-lambda'), handler='handler.handler', environment={ 'REGION': scope.region, 'USER_POOL_ID': self.__user_pool.ref, 'IDENTITY_POOL_ID': self.__id_pool.ref, 'LIMITED_ROLE_ARN': limited_auth_role.role_arn }, timeout=Duration.minutes(15), function_name=cognito_function_name) lambda_role = cognito_template_lambda.role lambda_role.add_to_policy( PolicyStatement( actions=['logs:CreateLogGroup'], resources=[stack.format_arn(service='logs', resource='*')])) lambda_role.add_to_policy( PolicyStatement( actions=['logs:CreateLogStream', 'logs:PutLogEvents'], resources=[ stack.format_arn(service='logs', resource='log_group', resource_name='/aws/lambda/' + cognito_function_name + ':*') ])) lambda_role.add_to_policy( PolicyStatement(actions=['cognito-idp:ListUserPoolClients'], resources=[self.user_pool.attr_arn])) lambda_role.add_to_policy( PolicyStatement(actions=['iam:PassRole'], resources=[limited_auth_role.role_arn])) cognito_id_res = Fn.join(':', [ 'arn:aws:cognito-identity', scope.region, scope.account, Fn.join('/', ['identitypool', self.__id_pool.ref]) ]) lambda_role.add_to_policy( PolicyStatement(actions=['cognito-identity:SetIdentityPoolRoles'], resources=[cognito_id_res])) # Get the Domain Endpoint and register it with the lambda as environment variable. register_template_lambda.add_environment( 'DOMAIN', self.__es_domain.attr_domain_endpoint) CfnOutput(scope=self, id='createUserUrl', description="Create a new user in the user pool here.", value="https://" + scope.region + ".console.aws.amazon.com/cognito/users?region=" + scope.region + "#/pool/" + self.user_pool.ref + "/users") CfnOutput(scope=self, id='kibanaUrl', description="Access Kibana via this URL.", value="https://" + self.__es_domain.attr_domain_endpoint + "/_plugin/kibana/") bootstrap_lambda_provider = Provider( scope=self, id='BootstrapLambdaProvider', on_event_handler=register_template_lambda) CustomResource(scope=self, id='ExecuteRegisterTemplate', service_token=bootstrap_lambda_provider.service_token, properties={'Timeout': 900}) cognito_lambda_provider = Provider( scope=self, id='CognitoFixLambdaProvider', on_event_handler=cognito_template_lambda) cognito_fix_resource = CustomResource( scope=self, id='ExecuteCognitoFix', service_token=cognito_lambda_provider.service_token) cognito_fix_resource.node.add_dependency(self.__es_domain)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "EKKStackVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #XXX: As there are no SSH public keys deployed on this machine, # you need to use EC2 Instance Connect with the command # 'aws ec2-instance-connect send-ssh-public-key' to provide your SSH public key. # https://aws.amazon.com/de/blogs/compute/new-using-amazon-ec2-instance-connect-for-ssh-access-to-your-ec2-instances/ bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host) #TODO: SHOULD restrict IP range allowed to ssh acces bastion_host.allow_ssh_access_from(aws_ec2.Peer.ipv4("0.0.0.0/0")) sg_use_es = aws_ec2.SecurityGroup( self, "ElasticSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch client', security_group_name='use-es-cluster-sg') cdk.Tags.of(sg_use_es).add('Name', 'use-es-cluster-sg') sg_es = aws_ec2.SecurityGroup( self, "ElasticSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an elasticsearch cluster', security_group_name='es-cluster-sg') cdk.Tags.of(sg_es).add('Name', 'es-cluster-sg') sg_es.add_ingress_rule(peer=sg_es, connection=aws_ec2.Port.all_tcp(), description='es-cluster-sg') sg_es.add_ingress_rule(peer=sg_use_es, connection=aws_ec2.Port.all_tcp(), description='use-es-cluster-sg') sg_es.add_ingress_rule(peer=sg_bastion_host, connection=aws_ec2.Port.all_tcp(), description='bastion-host-sg') #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 ES_DOMAIN_NAME = self.node.try_get_context("es_domain_name") es_cfn_domain = aws_elasticsearch.CfnDomain( self, "ElasticSearch", elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name=ES_DOMAIN_NAME, elasticsearch_version="7.10", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn( service="es", resource="domain", resource_name="{}/*".format(ES_DOMAIN_NAME)) }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids }) cdk.Tags.of(es_cfn_domain).add('Name', ES_DOMAIN_NAME) S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="ekk-stack-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[ es_cfn_domain.attr_arn, "{}/*".format(es_cfn_domain.attr_arn) ], actions=[ "es:DescribeElasticsearchDomain", "es:DescribeElasticsearchDomains", "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost", "es:ESHttpPut" ])) ES_INDEX_NAME = self.node.try_get_context("es_index_name") firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ es_cfn_domain.attr_arn, "{}/*".format(es_cfn_domain.attr_arn) ], # resources=[ # "{aes_arn}/_all/_settings".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_cluster/stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/{es_index_name}*/_mapping".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME), # "{aes_arn}/_nodes".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_nodes/*/stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/_stats".format(aes_arn=es_cfn_domain.attr_arn), # "{aes_arn}/{es_index_name}*/_stats".format(aes_arn=es_cfn_domain.attr_arn, es_index_name=ES_INDEX_NAME) # ], actions=["es:ESHttpGet"])) firehose_log_group_name = "/aws/kinesisfirehose/{}".format( ES_INDEX_NAME) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name="KinesisFirehoseServiceRole-{es_index}-{region}".format( es_index=ES_INDEX_NAME, region=cdk.Aws.REGION), assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) es_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty( role_arn=firehose_role.role_arn, security_group_ids=[sg_use_es.security_group_id], subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids) es_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( index_name=ES_INDEX_NAME, role_arn=firehose_role.role_arn, s3_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Backup" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console. "prefix": "{}/".format(ES_INDEX_NAME), "roleArn": firehose_role.role_arn }, buffering_hints={ "intervalInSeconds": 60, "sizeInMBs": 1 }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "ElasticsearchDelivery" }, domain_arn=es_cfn_domain.attr_arn, index_rotation_period= "NoRotation", # [NoRotation | OneDay | OneHour | OneMonth | OneWeek] retry_options={"durationInSeconds": 60}, s3_backup_mode= "FailedDocumentsOnly", # [AllDocuments | FailedDocumentsOnly] vpc_configuration=es_dest_vpc_config) firehose_to_es_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToES", delivery_stream_name=ES_INDEX_NAME, delivery_stream_type="DirectPut", elasticsearch_destination_configuration=es_dest_config, tags=[{ "key": "Name", "value": ES_DOMAIN_NAME }])