Exemplo n.º 1
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)
        s3bucket = s3.Bucket(self, 'vika-yy')
        kds = data_stream.Stream(self, 'data_stream', shard_count=1)

        delivery_stream_role = iam.Role(
            self,
            'kdfdelivery_stream_role_role',
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'))
        delivery_stream_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonKinesisFullAccess'))
        delivery_stream_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=[s3bucket.bucket_arn],
                                actions=["s3:*"]))

        #s3bucket = s3.Bucket(self, 'vika-yy',bucket_name='yellowtaxicdk-input')
        s3_dest_config = delivery_stream.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=s3bucket.bucket_arn,
            buffering_hints=delivery_stream.CfnDeliveryStream.
            BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=128),
            role_arn=delivery_stream_role.role_arn,
            compression_format='UNCOMPRESSED',
            s3_backup_mode='Disabled')

        stream_source_config = delivery_stream.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty(
            kinesis_stream_arn=kds.stream_arn,
            role_arn=delivery_stream_role.role_arn)

        kfirehose = delivery_stream.CfnDeliveryStream(
            self,
            'kfirehose',
            delivery_stream_name='deliverystream',
            delivery_stream_type='KinesisStreamAsSource',
            extended_s3_destination_configuration=s3_dest_config,
            kinesis_stream_source_configuration=stream_source_config)
        glue_role = iam.Role(
            self,
            'glue_role',
            assumed_by=iam.ServicePrincipal('glue.amazonaws.com'))
        glue_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=[s3bucket.bucket_arn],
                                actions=["s3:*"]))
        glue_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSGlueServiceRole'))

        bucket_name = s3bucket.bucket_name
        glue_crawler = glue.CfnCrawler(
            self,
            'glue_crawler',
            database_name='yellow-taxis',
            role=glue_role.role_arn,
            #targets={"s3Targets": [{"path": f'{BUCKET}/input/'}]}
            targets={"s3Targets": [{
                "path": f'{bucket_name}/input/'
            }]})
Exemplo n.º 2
0
    def __init__(self, scope: core.Construct, common: Common,
                 data_lake: DataLake, **kwargs) -> None:
        self.env = common.env
        super().__init__(scope,
                         id=f'{self.env}-data-lake-raw-ingestion',
                         **kwargs)
        name = f'firehose-{self.env}-raw-delivery-stream'
        raw_bucket = data_lake.data_lake_raw_bucket

        kinesis_role = RawKinesisRole(self,
                                      environment=common.env,
                                      raw_bucket=raw_bucket)

        s3_config = firehose.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=raw_bucket.bucket_arn,
            compression_format='ZIP',
            error_output_prefix='bad_records',
            prefix='atomic_events/year=!{timestamp:yyyy}/'
            'month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/',
            buffering_hints=firehose.CfnDeliveryStream.BufferingHintsProperty(
                interval_in_seconds=60, size_in_m_bs=1),
            role_arn=kinesis_role.role_arn)

        self.atomic_events = firehose.CfnDeliveryStream(
            self,
            id=name,
            delivery_stream_name=name,
            delivery_stream_type='DirectPut',
            extended_s3_destination_configuration=s3_config)

        self.dms_replication_task = OrdersDMS(self, common, data_lake)
Exemplo n.º 3
0
    def event_streams(self, bucket, event_recorder, event_sources):
        stream_role = _iam.Role(
            self,
            "StreamRole",
            assumed_by=_iam.ServicePrincipal('firehose.amazonaws.com'))
        bucket.grant_write(stream_role)

        event_streams = []
        for source in event_sources:
            event_streams.append(
                _kfh.CfnDeliveryStream(
                    self,
                    "{}Stream".format(source.capitalize()),
                    delivery_stream_name=source,
                    delivery_stream_type='DirectPut',
                    extended_s3_destination_configuration=_kfh.
                    CfnDeliveryStream.
                    ExtendedS3DestinationConfigurationProperty(
                        bucket_arn=bucket.bucket_arn,
                        buffering_hints=_kfh.CfnDeliveryStream.
                        BufferingHintsProperty(interval_in_seconds=60,
                                               size_in_m_bs=10),
                        compression_format='GZIP',
                        role_arn=stream_role.role_arn,
                        prefix="{}/".format(source))))
        return event_streams
    def create_firehose(
            self, delivery_bucket,
            firehose_role_arn) -> aws_kinesisfirehose.CfnDeliveryStream:
        """
        Creates a Firehose DeliveryStream configured to deliver to the S3 Bucket `delivery_bucket`,
        and log errors to a log stream named 'S3Delivery' in `log_group`. Firehose will adopt the
        role specified in `firehose_role_arn`.

        :param delivery_bucket: The delivery destination bucket for the Firehose
        :param firehose_role_arn: The role to adopt
        :return: a CfnDeliveryStream
        """
        firehose = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "integ_test_firehose",
            extended_s3_destination_configuration={
                "bucketArn": delivery_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 50
                },
                "compressionFormat": "ZIP",
                "roleArn": firehose_role_arn,
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": FirehoseStack.LOG_GROUP_NAME,
                    "logStreamName": FirehoseStack.LOG_STREAM_NAME,
                },
            },
        )
        return firehose
Exemplo n.º 5
0
    def __init__(self, scope: cdk.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name')
        role_name_cwl_to_kdf = cdk.Fn.import_value('siem-cwl-to-kdf-role-name')
        role_name_kdf_to_s3 = cdk.Fn.import_value('siem-kdf-to-s3-role-name')

        kdf_ad_name = cdk.CfnParameter(
            self,
            'KdfAdName',
            description='Kinesis Data Firehose Name to deliver AD event',
            default='siem-ad-event-to-s3')
        kdf_buffer_size = cdk.CfnParameter(
            self,
            'KdfBufferSize',
            type='Number',
            description='Enter a buffer size between 1 - 128 (MiB)',
            default=1,
            min_value=1,
            max_value=128)
        kdf_buffer_interval = cdk.CfnParameter(
            self,
            'KdfBufferInterval',
            type='Number',
            description='Enter a buffer interval between 60 - 900 (seconds.)',
            default=60,
            min_value=60,
            max_value=900)
        cwl_ad_name = cdk.CfnParameter(
            self,
            'CwlAdName',
            description='CloudWatch Logs group name',
            default='/aws/directoryservice/d-XXXXXXXXXXXXXXXXX')

        kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KDFForAdEventLog",
            delivery_stream_name=kdf_ad_name.value_as_string,
            s3_destination_configuration=CDS.
            S3DestinationConfigurationProperty(
                bucket_arn=f'arn:aws:s3:::{log_bucket_name}',
                prefix=
                f'AWSLogs/{cdk.Aws.ACCOUNT_ID}/DirectoryService/MicrosoftAD/',
                buffering_hints=CDS.BufferingHintsProperty(
                    interval_in_seconds=kdf_buffer_interval.value_as_number,
                    size_in_m_bs=kdf_buffer_size.value_as_number),
                compression_format='UNCOMPRESSED',
                role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/'
                          f'service-role/{role_name_kdf_to_s3}')))

        aws_logs.CfnSubscriptionFilter(
            self,
            'KinesisSubscription',
            destination_arn=kdf_to_s3.attr_arn,
            filter_pattern='',
            log_group_name=cwl_ad_name.value_as_string,
            role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/'
                      f'{role_name_cwl_to_kdf}'))
Exemplo n.º 6
0
    def __init__(self,
                 scope: cdk.Construct,
                 construct_id: str,
                 default_firehose_name='siem-XXXXXXXXXXX-to-s3',
                 firehose_compression_format='UNCOMPRESSED',
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name')
        role_name_kdf_to_s3 = cdk.Fn.import_value('siem-kdf-to-s3-role-name')

        kdf_name = cdk.CfnParameter(
            self,
            'FirehoseName',
            description=('New Kinesis Data Firehose Name to deliver log. '
                         'modify XXXXXXXXX'),
            default=default_firehose_name)
        kdf_buffer_size = cdk.CfnParameter(
            self,
            'FirehoseBufferSize',
            type='Number',
            description='Enter a buffer size between 1 - 128 (MiB)',
            default=1,
            min_value=1,
            max_value=128)
        kdf_buffer_interval = cdk.CfnParameter(
            self,
            'FirehoseBufferInterval',
            type='Number',
            description='Enter a buffer interval between 60 - 900 (seconds.)',
            default=60,
            min_value=60,
            max_value=900)
        s3_desitination_prefix = cdk.CfnParameter(
            self,
            'S3DestPrefix',
            description='S3 destination prefix',
            default='AWSLogs/YourAccuntId/LogType/Region/')

        self.kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "Kdf",
            delivery_stream_name=kdf_name.value_as_string,
            s3_destination_configuration=CDS.
            S3DestinationConfigurationProperty(
                bucket_arn=f'arn:aws:s3:::{log_bucket_name}',
                prefix=s3_desitination_prefix.value_as_string,
                buffering_hints=CDS.BufferingHintsProperty(
                    interval_in_seconds=kdf_buffer_interval.value_as_number,
                    size_in_m_bs=kdf_buffer_size.value_as_number),
                compression_format=firehose_compression_format,
                role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/'
                          f'service-role/{role_name_kdf_to_s3}')))
Exemplo n.º 7
0
    def __init__(self, scope: core.Construct, id: str, props: FirehoseProps,
                 **kwargs) -> None:

        # Firehose
        Stream = firehose.CfnDeliveryStream
        ExtendedS3DestConfProp = Stream.ExtendedS3DestinationConfigurationProperty
        FormatConversionProp = Stream.DataFormatConversionConfigurationProperty
        InputFormatConfProp = Stream.InputFormatConfigurationProperty
        OutputFormatConfProp = Stream.OutputFormatConfigurationProperty
        DeserializerProperty = Stream.DeserializerProperty
        SerializerProperty = Stream.SerializerProperty
        OpenXJsonSerDeProperty = Stream.OpenXJsonSerDeProperty
        ParquetSerDeProperty = Stream.ParquetSerDeProperty
        BufferingHintsProp = Stream.BufferingHintsProperty
        SchemaConfigProp = Stream.SchemaConfigurationProperty
        SourceStreamProp = Stream.KinesisStreamSourceConfigurationProperty

        iam_role_firehose_analytical = props.role

        self.delivery_stream = firehose.CfnDeliveryStream(
            scope,
            'deliveryClickstream',
            delivery_stream_name='deliveryClickStream',
            delivery_stream_type='KinesisStreamAsSource',
            kinesis_stream_source_configuration=SourceStreamProp(
                kinesis_stream_arn=props.stream.stream_arn,
                role_arn=iam_role_firehose_analytical.role_arn),
            extended_s3_destination_configuration=ExtendedS3DestConfProp(
                bucket_arn=props.bucket.bucket_arn,
                role_arn=iam_role_firehose_analytical.role_arn,
                buffering_hints=BufferingHintsProp(
                    interval_in_seconds=60,
                    size_in_m_bs=128,
                ),
                data_format_conversion_configuration=FormatConversionProp(
                    enabled=True,
                    input_format_configuration=InputFormatConfProp(
                        deserializer=DeserializerProperty(
                            open_x_json_ser_de=OpenXJsonSerDeProperty(), ), ),
                    output_format_configuration=OutputFormatConfProp(
                        serializer=SerializerProperty(
                            parquet_ser_de=ParquetSerDeProperty(
                                compression='UNCOMPRESSED',
                                enable_dictionary_compression=False,
                            ), )),
                    schema_configuration=SchemaConfigProp(
                        database_name=props.glue_db.database_name,
                        table_name=props.glue_table.table_name,
                        role_arn=iam_role_firehose_analytical.role_arn,
                    )),
                prefix='kinesis/'),
        )
Exemplo n.º 8
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ### Code for FirehoseStack

        # get role arn value from ddbqiIam stack
        print("++++++++++++++++++++++++++++++++++++")
        #rolearn = core.Token.toString(core.Fn.import_value("rolearn"))
        rolearn = core.Fn.import_value("rolearn")
        esarn = core.Fn.import_value("esarn")
        print("++++++++++++++++++++++++++++++++++++")

        # creating s3 bucket for failed logs
        log_s3 = s3.Bucket(self, constants["S3_BUCKET_NAME"])
        s3_config = {
            "bucketArn": log_s3.bucket_arn,
            #"roleArn": firehose_role.role_arn
            "roleArn": rolearn
        }
        es_dest_config = {
            "domainArn": esarn,
            "indexName": constants["DDBES_INDEX_NAME"],
            "roleArn": rolearn,
            "s3Configuration": s3_config,
            "bufferingHints": {
                "intervalInSeconds": 60,
                "sizeInMBs": 1
            },
        }
        self.firehose_deliverySystem = afh.CfnDeliveryStream(
            self,
            "ddbqiStream",
            delivery_stream_name=constants["FH_DELIVERY_STREAM_NAME"],
            delivery_stream_type="DirectPut",
            elasticsearch_destination_configuration=es_dest_config)
        core.Tags.of(self.firehose_deliverySystem).add(
            "project", constants["PROJECT_TAG"])

        core.CfnOutput(
            self,
            'StreamName',
            export_name="streamName",
            value=constants["FH_DELIVERY_STREAM_NAME"],
            description="firehose stream name",
        )
Exemplo n.º 9
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Create Bucket
        bucket = s3.Bucket(self, "cdk-firehose-bucket")

        # IAM Role for Firehose
        firehose_role = iam.Role(
            self,
            "FirehoseRole",
            assumed_by=iam.ServicePrincipal(service="firehose.amazonaws.com"))

        delivery_policy = iam.Policy(
            self,
            "FirehosePolicy",
            policy_name="FirehosePolicy",
            statements=[
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=[
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ],
                    resources=[bucket.bucket_arn, bucket.bucket_arn + "/*"])
            ])

        delivery_policy.attach_to_role(firehose_role)

        # Firehose stream
        delivery_stream = firehose.CfnDeliveryStream(
            self,
            "QueueingStream",
            delivery_stream_name="QueueingStream",
            s3_destination_configuration={
                "bucketArn": bucket.bucket_arn,
                "roleArn": firehose_role.role_arn
            },
            elasticsearch_destination_configuration=None)

        # delivery_stream.add_depends_on(firehose_role)

        # We assign the stream's arn and name to a local variable for the Object.
        self._delivery_stream_name = delivery_stream.delivery_stream_name
        self._delivery_stream_arn = delivery_stream.attr_arn
Exemplo n.º 10
0
    def __init__(self, *args, **kwargs):
        super(LoggignResources, self).__init__(*args, **kwargs)

        # Netowrk
        self.vpc = ec2.Vpc(self, 'logging-vpc')

        self.backup_bucket = s3.Bucket(self,
                                       'logging-backup',
                                       bucket_name='logging-backup-bucket')

        self.elastic_domain = es.CfnDomain(self, 'logging-es-cluster')

        self.stream = firehose.CfnDeliveryStream(
            self,
            'logging-stream',
            delivery_stream_name='logging-stream-firehose',
            delivery_stream_type='DirectPut',
            elasticsearch_destination_configuration=self.elastic_domain,
            s3_destination_configuration=self.backup_bucket)
    def __init__(
        self,
        scope: core.Construct,
        data_lake_raw_bucket: BaseDataLakeBucket,
        **kwargs,
    ) -> None:
        self.deploy_env = active_environment
        self.data_lake_raw_bucket = data_lake_raw_bucket
        super().__init__(scope,
                         id=f"{self.deploy_env.value}-kinesis-stack",
                         **kwargs)

        self.atomic_events = firehose.CfnDeliveryStream(
            self,
            id=f"firehose-{self.deploy_env.value}-raw-delivery-stream",
            delivery_stream_name=
            f"firehose-{self.deploy_env.value}-raw-delivery-stream",
            delivery_stream_type="DirectPut",
            extended_s3_destination_configuration=self.s3_config,
        )
Exemplo n.º 12
0
def base_kinesis_firehose_delivery_stream(construct, **kwargs):
    # TODO: ADD ROLES, BUCKETS, AND FIREHOSE MINIMUM SETTINGS
    """
    Function that generates a Kinesis Firehose Delivery Stream.
    :param construct: Custom construct that will use this function. From the external construct is usually 'self'.
    :param kwargs: Consist of required 'stream_name'.
    :return: Kinesis Stream Construct.
    """
    stream_name = construct.prefix + "_" + kwargs["stream_name"] + "_" + "stream" + "_" + construct.environment_
    destinations_config = firehose_destinations(kwargs["destinations"])
    firehose_stream = fh_stream.CfnDeliveryStream(
        construct,
        id=stream_name,
        delivery_stream_name=stream_name,
        elasticsearch_destination_configuration=destinations_config["elasticsearch_destination_configuration"],
        extended_s3_destination_configuration=destinations_config["extended_s3_destination_configuration"],
        redshift_destination_configuration=destinations_config["redshift_destination_configuration"],
        s3_destination_configuration=destinations_config["s3_destination_configuration"],
        splunk_destination_configuration=destinations_config["splunk_destination_configuration"],
    )

    return firehose_stream.attr_arn
    def __init__(self, scope: Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        #
        # Producer Lambda
        #
        event_producer_lambda = _lambda.Function(self, "eventProducerLambda",
                                                 runtime=_lambda.Runtime.PYTHON_3_8,
                                                 handler="event_producer_lambda.lambda_handler",
                                                 code=_lambda.Code.from_asset("lambda")
                                                 )

        event_policy = iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=['*'], actions=['events:PutEvents'])

        event_producer_lambda.add_to_role_policy(event_policy)

        #
        # Approved Consumer1
        #
        event_consumer1_lambda = _lambda.Function(self, "eventConsumer1Lambda",
                                                  runtime=_lambda.Runtime.PYTHON_3_8,
                                                  handler="event_consumer_lambda.lambda_handler",
                                                  code=_lambda.Code.from_asset("lambda")
                                                  )

        event_consumer1_rule = events.Rule(self, 'eventConsumer1LambdaRule',
                                           description='Approved Transactions',
                                           event_pattern=events.EventPattern(source=['com.mycompany.myapp']
                                                                             ))

        event_consumer1_rule.add_target(targets.LambdaFunction(handler=event_consumer1_lambda))

        #
        # Approved Consumer2
        #
        event_consumer2_lambda = _lambda.Function(self, "eventConsumer2Lambda",
                                                  runtime=_lambda.Runtime.PYTHON_3_8,
                                                  handler="event_consumer_lambda.lambda_handler",
                                                  code=_lambda.Code.from_asset("lambda")
                                                  )

        event_consumer2_rule = events.Rule(self, 'eventConsumer2LambdaRule',
                                           description='Approved Transactions',
                                           event_pattern=events.EventPattern(source=['com.mycompany.myapp']
                                                                             ))
        event_consumer2_rule.add_target(targets.LambdaFunction(handler=event_consumer2_lambda))

        #
        # Approved Consumer3
        #

        # Create S3 bucket for KinesisFirehose destination
        ingest_bucket = s3.Bucket(self, 'test-ngest-bucket')

        # Create a Role for KinesisFirehose
        firehose_role = iam.Role(
            self, 'myRole',
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'))

        # Create and attach policy that gives permissions to write in to the S3 bucket.
        iam.Policy(
            self, 's3_attr',
            policy_name='s3kinesis',
            statements=[iam.PolicyStatement(
                actions=['s3:*'],
                resources=['arn:aws:s3:::' + ingest_bucket.bucket_name + '/*'])],
                # resources=['*'])],
            roles=[firehose_role],
        )

        event_consumer3_kinesisfirehose = _firehose.CfnDeliveryStream(self, "consumer3-firehose",
                                                                      s3_destination_configuration=_firehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
                                                                          bucket_arn=ingest_bucket.bucket_arn,
                                                                          buffering_hints=_firehose.CfnDeliveryStream.BufferingHintsProperty(
                                                                              interval_in_seconds=60
                                                                          ),
                                                                          compression_format="UNCOMPRESSED",
                                                                          role_arn=firehose_role.role_arn
                                                                      ))

        event_consumer3_rule = events.Rule(self, 'eventConsumer3KinesisRule',
                                           description='Approved Transactions',
                                           event_pattern=events.EventPattern(source=['com.mycompany.myapp']
                                                                             ))
        event_consumer3_rule.add_target(targets.KinesisFirehoseStream(stream=event_consumer3_kinesisfirehose))

        # defines an API Gateway REST API resource backed by our "atm_producer_lambda" function.
        api = api_gw.LambdaRestApi(self, 'SampleAPI-EventBridge-Multi-Consumer',
                             handler=event_producer_lambda,
                             proxy=False
                             )
        items = api.root.add_resource("items")
        items.add_method("POST")  # POST /items
Exemplo n.º 14
0
    def __init__(self,
                 scope: cdk.Construct,
                 id: str,
                 name: str,
                 vpc_name: str,
                 security_group_name: str,
                 secrets_path: str = "/ibc/paper/",
                 trading_mode: str = "paper",
                 **kwargs) -> None:
        super().__init__(scope, id, *kwargs)

        # TODO: Create Log Group

        # Create a cluster
        vpc = ec2.Vpc.from_lookup(self, "vpc", vpc_name=vpc_name)

        privateSubnets = vpc.private_subnets

        cluster = ecs.Cluster(self, "cluster", vpc=vpc)
        # TODO: check for namespace before adding below.  This is failing on stack updates.
        cluster.add_default_cloud_map_namespace(name="private")

        task = ecs.FargateTaskDefinition(self,
                                         "task",
                                         cpu="512",
                                         memory_mi_b="1024")

        # Add SSM Permissions to IAM Role
        SSM_ACTIONS = ["ssm:GetParametersByPath", "kms:Decrypt"]
        SSM_RESOURCES = [
            "arn:aws:kms:*:*:alias/aws/ssm",
            "arn:aws:ssm:*:*:parameter{}*".format(secrets_path),
        ]
        ssmPolicy = iam.PolicyStatement(iam.PolicyStatementEffect.Allow)
        for action in SSM_ACTIONS:
            ssmPolicy.add_action(action)
        for resource in SSM_RESOURCES:
            ssmPolicy.add_resource(resource)
        task.add_to_task_role_policy(ssmPolicy)

        ibcRepo = ecr.Repository.from_repository_name(self, "container_repo",
                                                      "ibc")

        ibcImage = ecs.ContainerImage.from_ecr_repository(ibcRepo, "latest")

        # TODO: Add to Existing Hierarchal Logger, add log_group argument with ref to it
        ibcLogger = ecs.AwsLogDriver(self, "logger", stream_prefix=name)

        connectionLossMetric = logs.MetricFilter(
            self,
            "connectionLossMetric",
            filter_pattern=logs.FilterPattern.literal("ERROR ?110 ?130"),
            log_group=ibcLogger.log_group,
            metric_name="ib_connection_loss",
            metric_namespace=name,
        )

        newContainerMetric = logs.MetricFilter(
            self,
            "newContainerMetric",
            filter_pattern=logs.FilterPattern.literal(
                "Starting virtual X frame buffer"),
            log_group=ibcLogger.log_group,
            metric_name="new_container",
            metric_namespace=name,
        )

        kinesisFirehoseBucketActions = [
            "s3:AbortMultipartUpload",
            "s3:GetBucketLocation",
            "s3:GetObject",
            "s3:ListBucket",
            "s3:ListBucketMultipartUploads",
        ]

        kinesisFirehoseBucket = s3.Bucket(self, "firehoseBucket")

        kinesisFirehoseBucketPolicy = iam.PolicyStatement(
            iam.PolicyStatementEffect.Allow)
        for action in kinesisFirehoseBucketActions:
            kinesisFirehoseBucketPolicy.add_action(action)
        for resource in [
                kinesisFirehoseBucket.bucket_arn,
                kinesisFirehoseBucket.bucket_arn + "/*",
        ]:
            kinesisFirehoseBucketPolicy.add_resource(resource)

        kinesisFirehoseBucketRole = iam.Role(
            self,
            "kinesisFirehoseBucketRole",
            assumed_by=iam.ServicePrincipal("firehose.amazonaws.com"),
            path="/service/" + name + "/",
        )
        kinesisFirehoseBucketRole.add_to_policy(kinesisFirehoseBucketPolicy)

        kinesisFirehose = firehose.CfnDeliveryStream(
            self,
            "firehose",
            delivery_stream_name=name,
            delivery_stream_type="DirectPut",
            s3_destination_configuration={
                "bucketArn": kinesisFirehoseBucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 10 * 60,
                    "sizeInMBs": 16
                },
                "compressionFormat": "GZIP",
                "roleArn": kinesisFirehoseBucketRole.role_arn,
            },
        )

        # Add Firehose Permissions to Task IAM Role
        FIREHOSE_ACTIONS = ["firehose:PutRecord", "firehose:PutRecordBatch"]
        firehosePolicy = iam.PolicyStatement(iam.PolicyStatementEffect.Allow)
        for action in FIREHOSE_ACTIONS:
            firehosePolicy.add_action(action)
        firehosePolicy.add_resource(kinesisFirehose.delivery_stream_arn)
        task.add_to_task_role_policy(firehosePolicy)

        environment = {
            "SECRETS_PATH": secrets_path,
            "TWS_LIVE_PAPER": trading_mode,
            "FIREHOSE_STREAM_NAME": kinesisFirehose.delivery_stream_name,
        }

        ibcContainer = ecs.ContainerDefinition(
            self,
            "container",
            task_definition=task,
            image=ibcImage,
            environment=environment,
            logging=ibcLogger,
            essential=True,
        )

        securityGroup = ec2.SecurityGroup.from_security_group_id(
            self, "task_security_group", security_group_id=security_group_name)

        ibcService = ecs.FargateService(
            self,
            "fargate_service",
            cluster=cluster,
            task_definition=task,
            assign_public_ip=False,
            desired_count=1,
            security_group=securityGroup,
            service_discovery_options=ecs.ServiceDiscoveryOptions(name=name),
            service_name=name,
            vpc_subnets=privateSubnets,
        )
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 is_qa_stack=False,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        def qa_maybe(id_str: str) -> str:
            return id_str if not is_qa_stack else id_str + '-qa'

        # Bucket used to deliver events
        delivery_bucket = aws_s3.Bucket(
            self,
            id=qa_maybe('my-event-storage-bucket'),
            bucket_name=qa_maybe('my-event-storage-bucket'),
            block_public_access=aws_s3.BlockPublicAccess.BLOCK_ALL)

        # ---- Below is firehose related code ----
        # Since firehose is not yet cdk ready we need to do everything the old way with defining roles
        role = aws_iam.Role(
            self,
            id=qa_maybe('my-firehose-delivery-role'),
            assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com'))
        delivery_bucket.grant_write(role)

        # Everything that is not CDK ready still exists like Cfn (Cloudformation?) objects
        firehose = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            id=qa_maybe('my-pipeline-firehose'),
            delivery_stream_name=qa_maybe('my-pipeline-firehose'),
            delivery_stream_type='DirectPut',
            s3_destination_configuration={
                'bucketArn': delivery_bucket.bucket_arn,
                'bufferingHints': {
                    'intervalInSeconds':
                    900,  # The recommended setting is 900 (maximum for firehose)
                    'sizeInMBs': 5
                },
                'compressionFormat': 'UNCOMPRESSED',
                'prefix':
                'events/',  # This is the folder the events will end up in
                'errorOutputPrefix':
                'delivery_error/',  # Folder in case of delivery error
                'roleArn': role.role_arn
            })

        # Policy statement required for lambda to be able to put records on the firehose stream
        firehose_policy = aws_iam.PolicyStatement(
            actions=['firehose:DescribeDeliveryStream', 'firehose:PutRecord'],
            effect=aws_iam.Effect.ALLOW,
            resources=[firehose.attr_arn])

        # ---- API GW + Lambda code ----
        api_lambda = aws_lambda.Function(
            self,
            id=qa_maybe('my-api-gw-lambda'),
            runtime=aws_lambda.Runtime.PYTHON_3_8,
            code=aws_lambda.Code.asset('src/lambda_code/api_gw_lambda'),
            handler='main.handler',
            memory_size=128,
            timeout=core.Duration.seconds(5),
            environment={
                'region': self.region,
                'stream_name': firehose.delivery_stream_name
            })
        # Important to add the firehose postRecord policy to lambda otherwise there will be access errors
        api_lambda.add_to_role_policy(firehose_policy)

        # Create the lambda that will receive the data messages
        api_gw = aws_apigateway.LambdaRestApi(
            self,
            id=qa_maybe('my-api-gw'),
            handler=api_lambda,
            proxy=False,
            deploy_options=aws_apigateway.StageOptions(
                stage_name='qa' if is_qa_stack else 'prod'))

        # Add API query method
        api_gw.root.add_resource('send_data').add_method('GET',
                                                         api_key_required=True)

        # Generate an API key and add it to a usage plan
        api_key = api_gw.add_api_key(qa_maybe('MyPipelinePublicKey'))
        usage_plan = api_gw.add_usage_plan(
            id=qa_maybe('my-pipeline-usage-plan'),
            name='standard',
            api_key=api_key,
            throttle=aws_apigateway.ThrottleSettings(rate_limit=10,
                                                     burst_limit=2))

        # Add the usage plan to the API GW
        usage_plan.add_api_stage(stage=api_gw.deployment_stage)
Exemplo n.º 16
0
    def __init__(self, scope: cdk.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        log_bucket_name = cdk.Fn.import_value('sime-log-bucket-name')
        service_role_kdf_to_s3 = cdk.Fn.import_value(
            'siem-kdf-to-s3-role-name')

        cwe_frequency = cdk.CfnParameter(
            self,
            'cweRulesFrequency',
            type='Number',
            description=(
                'How often do you get WorkSpaces Inventory? (every minutes)'),
            default=720)
        kdf_workspaces_name = cdk.CfnParameter(
            self,
            'KdfWorkSpacesName',
            description=(
                'Kinesis Data Firehose Name to deliver workspaces event'),
            default='siem-workspaces-event-to-s3',
        )
        kdf_buffer_size = cdk.CfnParameter(
            self,
            'KdfBufferSize',
            type='Number',
            description='Enter a buffer size between 1 - 128 (MiB)',
            default=1,
            min_value=1,
            max_value=128)
        kdf_buffer_interval = cdk.CfnParameter(
            self,
            'KdfBufferInterval',
            type='Number',
            description='Enter a buffer interval between 60 - 900 (seconds.)',
            default=60,
            min_value=60,
            max_value=900)

        role_get_workspaces_inventory = aws_iam.Role(
            self,
            'getWorkspacesInventoryRole',
            role_name='siem-get-workspaces-inventory-role',
            inline_policies={
                'describe-workspaces':
                aws_iam.PolicyDocument(statements=[
                    aws_iam.PolicyStatement(
                        actions=['workspaces:Describe*'],
                        resources=['*'],
                        sid='DescribeWorkSpacesPolicyGeneratedBySeimCfn')
                ]),
                'firehose-to-s3':
                aws_iam.PolicyDocument(statements=[
                    aws_iam.PolicyStatement(
                        actions=['s3:PutObject'],
                        resources=[f'arn:aws:s3:::{log_bucket_name}/*'],
                        sid='FirehoseToS3PolicyGeneratedBySeimCfn')
                ])
            },
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    'service-role/AWSLambdaBasicExecutionRole'),
            ],
            assumed_by=aws_iam.ServicePrincipal('lambda.amazonaws.com'))

        # Lambda Functions to get workspaces inventory
        lambda_func = aws_lambda.Function(
            self,
            'lambdaGetWorkspacesInventory',
            runtime=aws_lambda.Runtime.PYTHON_3_8,
            code=aws_lambda.InlineCode(LAMBDA_GET_WORKSPACES_INVENTORY),
            function_name='siem-get-workspaces-inventory',
            description='SIEM: get workspaces inventory',
            handler='index.lambda_handler',
            timeout=cdk.Duration.seconds(300),
            role=role_get_workspaces_inventory,
            environment={'log_bucket_name': log_bucket_name})
        rule = aws_events.Rule(self,
                               'eventBridgeRuleWorkSpaceInventory',
                               rule_name='siem-workspaces-inventory-to-lambda',
                               schedule=aws_events.Schedule.rate(
                                   cdk.Duration.minutes(
                                       cwe_frequency.value_as_number)))
        rule.add_target(aws_events_targets.LambdaFunction(lambda_func))

        kdf_to_s3 = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KDFForWorkSpacesEvent",
            delivery_stream_name=kdf_workspaces_name.value_as_string,
            s3_destination_configuration=CDS.
            S3DestinationConfigurationProperty(
                bucket_arn=f'arn:aws:s3:::{log_bucket_name}',
                prefix=f'AWSLogs/{cdk.Aws.ACCOUNT_ID}/WorkSpaces/Event/',
                compression_format='GZIP',
                buffering_hints=CDS.BufferingHintsProperty(
                    interval_in_seconds=kdf_buffer_interval.value_as_number,
                    size_in_m_bs=kdf_buffer_size.value_as_number),
                role_arn=(f'arn:aws:iam::{cdk.Aws.ACCOUNT_ID}:role/'
                          f'service-role/{service_role_kdf_to_s3}')))

        pattern = aws_events.EventPattern(detail_type=["WorkSpaces Access"],
                                          source=['aws.workspaces'])

        aws_events.Rule(
            self,
            'eventBridgeRuleWorkSpacesEvent',
            event_pattern=pattern,
            rule_name='siem-workspaces-event-to-kdf',
            targets=[aws_events_targets.KinesisFirehoseStream(kdf_to_s3)])
Exemplo n.º 17
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        vpc = aws_ec2.Vpc(
            self,
            "FirehoseToS3VPC",
            max_azs=2,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="firehose-to-s3-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        FIREHOSE_STREAM_NAME = cdk.CfnParameter(
            self,
            'FirehoseStreamName',
            type='String',
            description='kinesis data firehose stream name',
            default='PUT-S3-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))))

        FIREHOSE_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseBufferSize',
            type='Number',
            description='kinesis data firehose buffer size',
            min_value=1,
            max_value=128,
            default=128)

        FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseBufferInterval',
            type='Number',
            description='kinesis data firehose buffer interval',
            min_value=60,
            max_value=300,
            default=60)

        FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferSize',
            type='Number',
            description=
            'kinesis data firehose buffer size for AWS Lambda to transform records',
            min_value=1,
            max_value=3,
            default=3)

        FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter(
            self,
            'FirehoseLambdaBufferInterval',
            type='Number',
            description=
            'kinesis data firehose buffer interval for AWS Lambda to transform records',
            min_value=60,
            max_value=900,
            default=300)

        FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter(
            self,
            'FirehoseLambdaNumberOfRetries',
            type='Number',
            description=
            'Number of retries for AWS Lambda to transform records in kinesis data firehose',
            min_value=1,
            max_value=5,
            default=3)

        FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter(
            self,
            'FirehosePrefix',
            type='String',
            description='kinesis data firehose S3 prefix')

        FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter(
            self,
            'FirehoseErrorOutputPrefix',
            type='String',
            description='kinesis data firehose S3 error output prefix',
            default=
            'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}'
        )

        METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor"
        metadata_extract_lambda_fn = aws_lambda.Function(
            self,
            "MetadataExtractor",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            function_name="MetadataExtractor",
            handler="metadata_extractor.lambda_handler",
            description="Extract partition keys from records",
            code=aws_lambda.Code.from_asset(
                os.path.join(os.path.dirname(__file__), 'src/main/python')),
            timeout=cdk.Duration.minutes(5))

        log_group = aws_logs.LogGroup(
            self,
            "MetadataExtractorLogGroup",
            #XXX: Circular dependency between resources occurs
            # if aws_lambda.Function.function_name is used
            # instead of literal name of lambda function such as "MetadataExtractor"
            log_group_name="/aws/lambda/{}".format(
                METADATA_EXTRACT_LAMBDA_FN_NAME),
            retention=aws_logs.RetentionDays.THREE_DAYS,
            removal_policy=cdk.RemovalPolicy.DESTROY)
        log_group.grant_write(metadata_extract_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html
        # String-encoded tokens:
        #  Avoid manipulating the string in other ways. For example,
        #  taking a substring of a string is likely to break the string token.
        firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    #XXX: The ARN will be formatted as follows:
                    # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                    "resources": [
                        self.format_arn(
                            partition="aws",
                            service="lambda",
                            region=cdk.Aws.REGION,
                            account=cdk.Aws.ACCOUNT_ID,
                            resource="function",
                            resource_name="{}:*".format(
                                metadata_extract_lambda_fn.function_name),
                            arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                    ],
                    "actions": [
                        "lambda:InvokeFunction",
                        "lambda:GetFunctionConfiguration"
                    ]
                }))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name="KinesisFirehoseServiceRole-{stream_name}-{region}".
            format(stream_name=FIREHOSE_STREAM_NAME.value_as_string,
                   region=cdk.Aws.REGION),
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            path='/service-role/',
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        lambda_proc = cfn.ProcessorProperty(
            type="Lambda",
            parameters=[
                cfn.ProcessorParameterProperty(
                    parameter_name="LambdaArn",
                    parameter_value='{}:{}'.format(
                        metadata_extract_lambda_fn.function_arn,
                        metadata_extract_lambda_fn.current_version.version)),
                cfn.ProcessorParameterProperty(
                    parameter_name="NumberOfRetries",
                    parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES.
                    value_as_string),
                cfn.ProcessorParameterProperty(
                    parameter_name="RoleArn",
                    parameter_value=firehose_role.role_arn),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferSizeInMBs",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string
                ),
                cfn.ProcessorParameterProperty(
                    parameter_name="BufferIntervalInSeconds",
                    parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL.
                    value_as_string)
            ])

        record_deaggregation_proc = cfn.ProcessorProperty(
            type="RecordDeAggregation",
            parameters=[
                cfn.ProcessorParameterProperty(parameter_name="SubRecordType",
                                               parameter_value="JSON")
            ])

        #XXX: Adding a new line delimiter when delivering data to S3
        # This is also particularly useful when dynamic partitioning is applied to aggregated data
        # because multirecord deaggregation (which must be applied to aggregated data
        # before it can be dynamically partitioned) removes new lines from records as part of the parsing process.
        # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter
        append_delim_to_record_proc = cfn.ProcessorProperty(
            type="AppendDelimiterToRecord", parameters=[])

        firehose_processing_config = cfn.ProcessingConfigurationProperty(
            enabled=True,
            processors=[
                record_deaggregation_proc, append_delim_to_record_proc,
                lambda_proc
            ])

        ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=s3_bucket.bucket_arn,
            role_arn=firehose_role.role_arn,
            buffering_hints={
                "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number,
                "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "DestinationDelivery"
            },
            compression_format=
            "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
            data_format_conversion_configuration={"enabled": False},
            dynamic_partitioning_configuration={
                "enabled": True,
                "retryOptions": {
                    "durationInSeconds": 300
                }
            },
            error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX.
            value_as_string,
            prefix=FIREHOSE_TO_S3_PREFIX.value_as_string,
            processing_configuration=firehose_processing_config)

        firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "FirehoseToS3",
            delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            extended_s3_destination_configuration=ext_s3_dest_config,
            tags=[{
                "key": "Name",
                "value": FIREHOSE_STREAM_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'StackName',
                      value=self.stack_name,
                      export_name='StackName')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
Exemplo n.º 18
0
    def __init__(self, scope: core.Stack, id: str, **kwargs):
        super().__init__(scope, id, **kwargs)

        self.output_bucket = aws_s3.Bucket(
            self, "BucketTwitterStreamOutput",
            bucket_name = self.stack_name,
        )

        self.bucket_url = self.output_bucket.bucket_regional_domain_name

        # Because kinesis firehose bindings are to direct CF, we have to create IAM policy/role and attach on our own
        self.iam_role = aws_iam.Role(
            self, "IAMRoleTwitterStreamKinesisFHToS3",
            role_name="KinesisFirehoseToS3-{}".format(self.stack_name),
            assumed_by=aws_iam.ServicePrincipal(service='firehose.amazonaws.com'),
        )

        # S3 bucket actions
        self.s3_iam_policy_statement = aws_iam.PolicyStatement()
        actions = ["s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject"]
        for action in actions:
            self.s3_iam_policy_statement.add_actions(action)
        self.s3_iam_policy_statement.add_resources(self.output_bucket.bucket_arn)
        self.s3_iam_policy_statement.add_resources(self.output_bucket.bucket_arn + "/*")

        # CW error log setup
        self.s3_error_logs_group = aws_logs.LogGroup(
            self, "S3ErrorLogsGroup",
            log_group_name="{}-s3-errors".format(self.stack_name)
        )

        self.s3_error_logs_stream = aws_logs.LogStream(
            self, "S3ErrorLogsStream",
            log_group=self.s3_error_logs_group,
            log_stream_name='s3Backup'
        )

        self.firehose = aws_kinesisfirehose.CfnDeliveryStream(
            self, "FirehoseTwitterStream",
            delivery_stream_name = "{}-raw".format(self.stack_name),
            delivery_stream_type = "DirectPut",
            s3_destination_configuration={
                'bucketArn': self.output_bucket.bucket_arn,
                'bufferingHints': {
                    'intervalInSeconds': 120,
                    'sizeInMBs': 10
                },
                'compressionFormat': 'UNCOMPRESSED',
                'roleArn': self.iam_role.role_arn,
                'cloudWatchLoggingOptions': {
                    'enabled': True,
                    'logGroupName': "{}-raw".format(self.stack_name),
                    'logStreamName': 's3BackupRaw'
                },
                'prefix': 'twitter-raw/'
            },
        )

        # TODO: Only attach what's needed for this policy, right now i'm lazy and attaching all policies
        self.iam_policy = aws_iam.Policy(
            self, "IAMPolicyTwitterStreamKinesisFHToS3",
            policy_name="KinesisFirehoseToS3-{}".format(self.stack_name),
            statements=[self.s3_iam_policy_statement],
        )

        self.iam_policy.attach_to_role(self.iam_role)

        # Because kinesis firehose bindings are to direct CF, we have to create IAM policy/role and attach on our own
        self.curator_firehose = aws_kinesisfirehose.CfnDeliveryStream(
            self, "CuratorFirehoseStream",
            delivery_stream_name = "{}-curator".format(self.stack_name),
            delivery_stream_type = "DirectPut",
            s3_destination_configuration={
                'bucketArn': self.output_bucket.bucket_arn,
                'bufferingHints': {
                    'intervalInSeconds': 120,
                    'sizeInMBs': 10
                },
                'compressionFormat': 'UNCOMPRESSED',
                'roleArn': self.iam_role.role_arn,
                'cloudWatchLoggingOptions': {
                    'enabled': True,
                    'logGroupName': "{}-curator".format(self.stack_name),
                    'logStreamName': 's3BackupCurator'
                },
                'prefix': 'twitter-curated/'
            },
        )

        def zip_package():
            cwd = os.getcwd()
            file_name = 'curator-lambda.zip'
            zip_file = cwd + '/' + file_name

            os.chdir('src/')
            sh.zip('-r9', zip_file, '.')
            os.chdir(cwd)

            return file_name, zip_file

        _, zip_file = zip_package()

        self.twitter_stream_curator_lambda_function = aws_lambda.Function(
            self, "TwitterStreamCuratorLambdaFunction",
            function_name="{}-curator".format(self.stack_name),
            code=aws_lambda.AssetCode(zip_file),
            handler="sentiment_analysis.lambda_handler",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            tracing=aws_lambda.Tracing.ACTIVE,
            description="Triggers from S3 PUT event for twitter stream data and transorms it to clean json syntax with sentiment analysis attached",
            environment={
                "STACK_NAME": self.stack_name,
                "FIREHOSE_STREAM": self.curator_firehose.delivery_stream_name
            },
            memory_size=128,
            timeout=core.Duration.seconds(120),
            log_retention=aws_logs.RetentionDays.ONE_WEEK,
        )

        # Permission to talk to comprehend for sentiment analysis
        self.comprehend_iam_policy_statement = aws_iam.PolicyStatement()
        self.comprehend_iam_policy_statement.add_actions('comprehend:*')
        self.comprehend_iam_policy_statement.add_all_resources()
        self.twitter_stream_curator_lambda_function.add_to_role_policy(self.comprehend_iam_policy_statement)

        # Permission to put in kinesis firehose
        self.curator_firehose_iam_policy_statement = aws_iam.PolicyStatement()
        self.curator_firehose_iam_policy_statement.add_actions('firehose:Put*')
        self.curator_firehose_iam_policy_statement.add_resources(self.curator_firehose.attr_arn)
        self.twitter_stream_curator_lambda_function.add_to_role_policy(self.curator_firehose_iam_policy_statement)

        # Attaching the policy to the IAM role for KFH
        self.output_bucket.grant_read(self.twitter_stream_curator_lambda_function)

        self.twitter_stream_curator_lambda_function.add_event_source(
            aws_lambda_event_sources.S3EventSource(
                bucket=self.output_bucket,
                events=[
                    aws_s3.EventType.OBJECT_CREATED
                ],
                filters=[
                    aws_s3.NotificationKeyFilter(
                        prefix="twitter-raw/"
                    )
                ]
            )
        )
Exemplo n.º 19
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        kda_src_bucket_name = core.CfnParameter(
            self,
            "kda_src_bucket_name",
            type="String",
            description=
            "The name of the Amazon S3 bucket where uploaded files will be stored."
        )

        kda_output_bucket_name = core.CfnParameter(
            self,
            "kda_output_bucket_name",
            type="String",
            description=
            "The name of the Amazon S3 bucket KDA output via Firehose will be stored."
        )

        sourceStreamName = core.CfnParameter(
            self,
            "sourceStreamName",
            type="String",
            description="The name of the Kinesis Data Stream.",
            default="BikeRideGenerator")

        deliveryStreamName = core.CfnParameter(
            self,
            "deliveryStreamName",
            type="String",
            description="The name of the Kinesis Firehose output stream.",
            default="BikeAnalyticsOutput")

        # Create S3 buckets
        kda_src_bucket = s3.Bucket(
            self,
            "kda_src_bucket",
            bucket_name=kda_src_bucket_name.value_as_string,
            versioned=False,
            removal_policy=core.RemovalPolicy.DESTROY)
        kda_output_bucket = s3.Bucket(
            self,
            "kda_output_bucket",
            bucket_name=kda_output_bucket_name.value_as_string,
            versioned=False,
            removal_policy=core.RemovalPolicy.DESTROY)

        # create Kinesis Source Stream
        sourceStream = kds.Stream(self,
                                  "sourceStream",
                                  stream_name=sourceStreamName.value_as_string,
                                  shard_count=10)

        # Firehose Role aws_cdk.aws_iam.CfnRole
        fhIAMRole = iam.Role(
            self,
            "fhIAMRole",
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'),
            role_name="BikeRideFirehoseDeliveryRole",
            description="FireHose Delivery S3 Role")

        fhIAMRole.add_to_policy(
            iam.PolicyStatement(resources=[kda_output_bucket.bucket_arn],
                                actions=['s3:*']))

        # create Firehose delivery stream
        fhS3Delivery = fh.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=kda_output_bucket.bucket_arn,
            role_arn=fhIAMRole.role_arn)

        deliveryStream = fh.CfnDeliveryStream(
            self,
            "deliveryStream",
            delivery_stream_name=deliveryStreamName.value_as_string,
            extended_s3_destination_configuration=fhS3Delivery)

        # ec2 instance
        # VPC
        vpc = ec2.Vpc(self,
                      "KDA-VPC",
                      nat_gateways=0,
                      subnet_configuration=[
                          ec2.SubnetConfiguration(
                              name="public", subnet_type=ec2.SubnetType.PUBLIC)
                      ])

        # AMI
        amzn_linux = ec2.MachineImage.latest_amazon_linux(
            generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=ec2.AmazonLinuxEdition.STANDARD,
            virtualization=ec2.AmazonLinuxVirt.HVM,
            storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE)

        # Instance Role and SSM Managed Policy
        ec2role = iam.Role(
            self,
            "InstanceSSM",
            assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"))

        ec2role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "service-role/AmazonEC2RoleforSSM"))

        ec2role.add_to_policy(
            iam.PolicyStatement(resources=[sourceStream.stream_arn],
                                actions=['kinesis:*']))
        user_data = "#!/bin/bash\n"
        user_data += "echo export KINESIS_STREAM=" + sourceStreamName.value_as_string + " | sudo tee -a /etc/profile\n"
        user_data += "source /etc/profile\n"
        user_data += user_data_file

        # Instance
        instance = ec2.Instance(self,
                                "Instance",
                                instance_type=ec2.InstanceType("t3.small"),
                                machine_image=amzn_linux,
                                vpc=vpc,
                                role=ec2role,
                                user_data=ec2.UserData.custom(user_data))
Exemplo n.º 20
0
    def _build_firehose_delivery_stream(self, *, stack, vpc_db_instance):

        self.kfh_log_group = logs.LogGroup(
            stack,
            "exampledeliverystreamloggroup",
            log_group_name="/aws/kinesisfirehose/exampledeliverystream")

        self.kfh_es_log_stream = logs.LogStream(stack,
                                                "deliverytoeslogstream",
                                                log_stream_name="deliverytoes",
                                                log_group=self.kfh_log_group)

        self.kfh_instance = kfh.CfnDeliveryStream(
            stack,
            'exampledeliverystream',
            delivery_stream_type='DirectPut',
            elasticsearch_destination_configuration={
                "indexName": "webappclickstream",
                "cloudwatch_logging_options": {
                    "Enabled": True,
                    "LogGroupName": "exampledeliverystream",
                    "LogStreamName": "deliverytoes"
                },
                "roleArn": self.firehose_role.role_arn,
                "s3Configuration": {
                    "bucketArn": self.firehose_bucket.bucket_arn,
                    "roleArn": self.firehose_role.role_arn
                },
                "domainArn": self.elastic_search.attr_arn,
                "vpcConfiguration": {
                    "roleArn":
                    self.firehose_role.role_arn,
                    "securityGroupIds":
                    [self.kfh_security_group.security_group_id],
                    "subnetIds": [
                        vpc_db_instance.vpc.select_subnets(
                            subnet_type=ec2.SubnetType.PRIVATE).subnet_ids[0]
                    ]
                },
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "ProcessingConfiguration": {
                    "enabled": True,
                    "Processors": []
                },
                "ProcessingConfiguration": {
                    "Enabled":
                    "true",
                    "Processors": [{
                        "Parameters": [{
                            "ParameterName":
                            "LambdaArn",
                            "ParameterValue":
                            self.lambda_transform_fn.function_arn
                        }],
                        "Type":
                        "Lambda"
                    }]
                }
            })
Exemplo n.º 21
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        vpc = aws_ec2.Vpc(
            self,
            "OctemberVPC",
            max_azs=2,
            #      subnet_configuration=[{
            #          "cidrMask": 24,
            #          "name": "Public",
            #          "subnetType": aws_ec2.SubnetType.PUBLIC,
            #        },
            #        {
            #          "cidrMask": 24,
            #          "name": "Private",
            #          "subnetType": aws_ec2.SubnetType.PRIVATE
            #        },
            #        {
            #          "cidrMask": 28,
            #          "name": "Isolated",
            #          "subnetType": aws_ec2.SubnetType.ISOLATED,
            #          "reserved": True
            #        }
            #      ],
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        dynamo_db_endpoint = vpc.add_gateway_endpoint(
            "DynamoDbEndpoint",
            service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB)

        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            bucket_name="octember-bizcard-{region}-{account}".format(
                region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID))

        api = apigw.RestApi(
            self,
            "BizcardImageUploader",
            rest_api_name="BizcardImageUploader",
            description="This service serves uploading bizcard images into s3.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            binary_media_types=["image/png", "image/jpg"],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        rest_api_role = aws_iam.Role(
            self,
            "ApiGatewayRoleForS3",
            role_name="ApiGatewayRoleForS3FullAccess",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess")
            ])

        list_objects_responses = [
            apigw.IntegrationResponse(
                status_code="200",
                #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters
                # The response parameters from the backend response that API Gateway sends to the method response.
                # Use the destination as the key and the source as the value:
                #  - The destination must be an existing response parameter in the MethodResponse property.
                #  - The source must be an existing method request parameter or a static value.
                response_parameters={
                    'method.response.header.Timestamp':
                    'integration.response.header.Date',
                    'method.response.header.Content-Length':
                    'integration.response.header.Content-Length',
                    'method.response.header.Content-Type':
                    'integration.response.header.Content-Type'
                }),
            apigw.IntegrationResponse(status_code="400",
                                      selection_pattern="4\d{2}"),
            apigw.IntegrationResponse(status_code="500",
                                      selection_pattern="5\d{2}")
        ]

        list_objects_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses)

        get_s3_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path='/',
            options=list_objects_integration_options)

        api.root.add_method(
            "GET",
            get_s3_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={'method.request.header.Content-Type': False})

        get_s3_folder_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters
            # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value.
            # The source must be an existing method request parameter or a static value.
            request_parameters={
                "integration.request.path.bucket": "method.request.path.folder"
            })

        get_s3_folder_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}",
            options=get_s3_folder_integration_options)

        s3_folder = api.root.add_resource('{folder}')
        s3_folder.add_method(
            "GET",
            get_s3_folder_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True
            })

        get_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            request_parameters={
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        get_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}/{object}",
            options=get_s3_item_integration_options)

        s3_item = s3_folder.add_resource('{item}')
        s3_item.add_method(
            "GET",
            get_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        put_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=[
                apigw.IntegrationResponse(status_code="200"),
                apigw.IntegrationResponse(status_code="400",
                                          selection_pattern="4\d{2}"),
                apigw.IntegrationResponse(status_code="500",
                                          selection_pattern="5\d{2}")
            ],
            request_parameters={
                "integration.request.header.Content-Type":
                "method.request.header.Content-Type",
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        put_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="PUT",
            path="{bucket}/{object}",
            options=put_s3_item_integration_options)

        s3_item.add_method(
            "PUT",
            put_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        ddb_table = dynamodb.Table(
            self,
            "BizcardImageMetaInfoDdbTable",
            table_name="OctemberBizcardImgMeta",
            partition_key=dynamodb.Attribute(
                name="image_id", type=dynamodb.AttributeType.STRING),
            billing_mode=dynamodb.BillingMode.PROVISIONED,
            read_capacity=15,
            write_capacity=5)

        img_kinesis_stream = kinesis.Stream(
            self, "BizcardImagePath", stream_name="octember-bizcard-image")

        # create lambda function
        trigger_textract_lambda_fn = _lambda.Function(
            self,
            "TriggerTextExtractorFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="TriggerTextExtractorFromImage",
            handler="trigger_text_extract_from_s3_image.lambda_handler",
            description="Trigger to extract text from an image in S3",
            code=_lambda.Code.asset(
                "./src/main/python/TriggerTextExtractFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        ddb_table_rw_policy_statement = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[ddb_table.table_arn],
            actions=[
                "dynamodb:BatchGetItem", "dynamodb:Describe*",
                "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query",
                "dynamodb:Scan", "dynamodb:BatchWriteItem",
                "dynamodb:DeleteItem", "dynamodb:PutItem",
                "dynamodb:UpdateItem", "dax:Describe*", "dax:List*",
                "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan",
                "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem",
                "dax:UpdateItem"
            ])

        trigger_textract_lambda_fn.add_to_role_policy(
            ddb_table_rw_policy_statement)
        trigger_textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[img_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        # assign notification for the s3 event type (ex: OBJECT_CREATED)
        s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/",
                                                   suffix=".jpg")
        s3_event_source = S3EventSource(s3_bucket,
                                        events=[s3.EventType.OBJECT_CREATED],
                                        filters=[s3_event_filter])
        trigger_textract_lambda_fn.add_event_source(s3_event_source)

        #XXX: https://github.com/aws/aws-cdk/issues/2240
        # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a
        # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props
        log_group = aws_logs.LogGroup(
            self,
            "TriggerTextractLogGroup",
            log_group_name="/aws/lambda/TriggerTextExtractorFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(trigger_textract_lambda_fn)

        text_kinesis_stream = kinesis.Stream(
            self, "BizcardTextData", stream_name="octember-bizcard-txt")

        textract_lambda_fn = _lambda.Function(
            self,
            "GetTextFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="GetTextFromImage",
            handler="get_text_from_s3_image.lambda_handler",
            description="extract text from an image in S3",
            code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement)
        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["textract:*"]))

        img_kinesis_event_source = KinesisEventSource(
            img_kinesis_stream,
            batch_size=100,
            starting_position=_lambda.StartingPosition.LATEST)
        textract_lambda_fn.add_event_source(img_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "GetTextFromImageLogGroup",
            log_group_name="/aws/lambda/GetTextFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(textract_lambda_fn)

        sg_use_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard elasticsearch client',
            security_group_name='use-octember-bizcard-es')
        core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es')

        sg_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard elasticsearch',
            security_group_name='octember-bizcard-es')
        core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es')

        sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='octember-bizcard-es')
        sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='use-octember-bizcard-es')

        sg_ssh_access = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for bastion host',
            security_group_name='octember-bastion-host-sg')
        core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host')
        sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(),
                                       connection=aws_ec2.Port.tcp(22),
                                       description='ssh access')

        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=aws_ec2.InstanceType('t3.nano'),
            security_group=sg_ssh_access,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC))
        bastion_host.instance.add_security_group(sg_use_bizcard_es)

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            'BizcardSearch',
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name="octember-bizcard",
            elasticsearch_version="7.9",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(service="es",
                                    resource="domain",
                                    resource_name="octember-bizcard/*")
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_bizcard_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids
            })
        core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es')

        s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name")

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        s3_lib_bucket = s3.Bucket.from_bucket_name(self, id,
                                                   s3_lib_bucket_name)
        es_lib_layer = _lambda.LayerVersion(
            self,
            "ESLib",
            layer_version_name="es-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-es-lib.zip"))

        redis_lib_layer = _lambda.LayerVersion(
            self,
            "RedisLib",
            layer_version_name="redis-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-redis-lib.zip"))

        #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342
        upsert_to_es_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToES",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToElasticSearch",
            handler="upsert_bizcard_to_es.lambda_handler",
            description="Upsert bizcard text into elasticsearch",
            code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard'
            },
            timeout=core.Duration.minutes(5),
            layers=[es_lib_layer],
            security_groups=[sg_use_bizcard_es],
            vpc=vpc)

        text_kinesis_event_source = KinesisEventSource(
            text_kinesis_stream,
            batch_size=99,
            starting_position=_lambda.StartingPosition.LATEST)
        upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToESLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToElasticSearch",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_es_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "glue:GetTable",
                                        "glue:GetTableVersion",
                                        "glue:GetTableVersions"
                                    ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:DescribeStream",
                                        "kinesis:GetShardIterator",
                                        "kinesis:GetRecords"
                                    ]))

        firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(service="logs",
                                    resource="log-group",
                                    resource_name="{}:log-stream:*".format(
                                        firehose_log_group_name),
                                    sep=":")
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "BizcardTextToS3",
            delivery_stream_name="octember-bizcard-txt-to-s3",
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration={
                "kinesisStreamArn": text_kinesis_stream.stream_arn,
                "roleArn": firehose_role.role_arn
            },
            extended_s3_destination_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Delivery"
                },
                "compressionFormat": "GZIP",
                "prefix": "bizcard-text/",
                "roleArn": firehose_role.role_arn
            })

        sg_use_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache client',
            security_group_name='use-octember-bizcard-es-cache')
        core.Tags.of(sg_use_bizcard_es_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache',
            security_group_name='octember-bizcard-es-cache')
        core.Tags.of(sg_bizcard_es_cache).add('Name',
                                              'octember-bizcard-es-cache')

        sg_bizcard_es_cache.add_ingress_rule(
            peer=sg_use_bizcard_es_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-es-cache')

        es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "QueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-es-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-es-cache')

        es_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardSearchQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-es-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-es-cache',
            vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id])

        #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster.
        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname
        es_query_cache.add_depends_on(es_query_cache_subnet_group)

        #XXX: add more than 2 security groups
        # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387
        # https://github.com/aws/aws-cdk/issues/1555
        # https://github.com/aws/aws-cdk/pull/5049
        bizcard_search_lambda_fn = _lambda.Function(
            self,
            "BizcardSearchServer",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardSearchProxy",
            handler="es_search_bizcard.lambda_handler",
            description="Proxy server to search bizcard text",
            code=_lambda.Code.asset("./src/main/python/SearchBizcard"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard',
                'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[es_lib_layer, redis_lib_layer],
            security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        search_api = apigw.LambdaRestApi(
            self,
            "BizcardSearchAPI",
            handler=bizcard_search_lambda_fn,
            proxy=False,
            rest_api_name="BizcardSearch",
            description="This service serves searching bizcard text.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_search = search_api.root.add_resource('search')
        bizcard_search.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sg_use_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db client',
            security_group_name='use-octember-bizcard-neptune')
        core.Tags.of(sg_use_bizcard_graph_db).add(
            'Name', 'use-octember-bizcard-neptune')

        sg_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db',
            security_group_name='octember-bizcard-neptune')
        core.Tags.of(sg_bizcard_graph_db).add('Name',
                                              'octember-bizcard-neptune')

        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='octember-bizcard-neptune')
        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_use_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='use-octember-bizcard-neptune')

        bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(
            self,
            "NeptuneSubnetGroup",
            db_subnet_group_description=
            "subnet group for octember-bizcard-neptune",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            db_subnet_group_name='octember-bizcard-neptune')

        bizcard_graph_db = aws_neptune.CfnDBCluster(
            self,
            "BizcardGraphDB",
            availability_zones=vpc.availability_zones,
            db_subnet_group_name=bizcard_graph_db_subnet_group.
            db_subnet_group_name,
            db_cluster_identifier="octember-bizcard",
            backup_retention_period=1,
            preferred_backup_window="08:45-09:15",
            preferred_maintenance_window="sun:18:00-sun:18:30",
            vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id])
        bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group)

        bizcard_graph_db_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[0],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_instance.add_depends_on(bizcard_graph_db)

        bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBReplicaInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[-1],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard-replica",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db)
        bizcard_graph_db_replica_instance.add_depends_on(
            bizcard_graph_db_instance)

        gremlinpython_lib_layer = _lambda.LayerVersion(
            self,
            "GremlinPythonLib",
            layer_version_name="gremlinpython-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(
                s3_lib_bucket, "var/octember-gremlinpython-lib.zip"))

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        upsert_to_neptune_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToGraphDB",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToNeptune",
            handler="upsert_bizcard_to_graph_db.lambda_handler",
            description="Upsert bizcard into neptune",
            code=_lambda.Code.asset(
                "./src/main/python/UpsertBizcardToGraphDB"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port
            },
            timeout=core.Duration.minutes(5),
            layers=[gremlinpython_lib_layer],
            security_groups=[sg_use_bizcard_graph_db],
            vpc=vpc)

        upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToGraphDBLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToNeptune",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_neptune_lambda_fn)

        sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache client',
            security_group_name='use-octember-bizcard-neptune-cache')
        core.Tags.of(sg_use_bizcard_neptune_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache',
            security_group_name='octember-bizcard-neptune-cache')
        core.Tags.of(sg_bizcard_neptune_cache).add(
            'Name', 'octember-bizcard-neptune-cache')

        sg_bizcard_neptune_cache.add_ingress_rule(
            peer=sg_use_bizcard_neptune_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-neptune-cache')

        recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "RecommQueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-neptune-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-neptune-cache')

        recomm_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardRecommQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-neptune-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-neptune-cache',
            vpc_security_group_ids=[
                sg_bizcard_neptune_cache.security_group_id
            ])

        recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group)

        bizcard_recomm_lambda_fn = _lambda.Function(
            self,
            "BizcardRecommender",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardRecommender",
            handler="neptune_recommend_bizcard.lambda_handler",
            description="This service serves PYMK(People You May Know).",
            code=_lambda.Code.asset("./src/main/python/RecommendBizcard"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port,
                'ELASTICACHE_HOST':
                recomm_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[gremlinpython_lib_layer, redis_lib_layer],
            security_groups=[
                sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache
            ],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        recomm_api = apigw.LambdaRestApi(
            self,
            "BizcardRecommendAPI",
            handler=bizcard_recomm_lambda_fn,
            proxy=False,
            rest_api_name="BizcardRecommend",
            description="This service serves PYMK(People You May Know).",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_recomm = recomm_api.root.add_resource('pymk')
        bizcard_recomm.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:s3:::aws-neptune-notebook",
                        "arn:aws:s3:::aws-neptune-notebook/*"
                    ],
                    "actions": ["s3:GetObject", "s3:ListBucket"]
                }))

        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".
                        format(region=core.Aws.REGION,
                               account=core.Aws.ACCOUNT_ID,
                               cluster_id=bizcard_graph_db.
                               attr_cluster_resource_id)
                    ],
                    "actions": ["neptune-db:connect"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookForNeptuneWorkbenchRole',
            role_name='AWSNeptuneNotebookRole-OctemberBizcard',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={
                'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
            })

        neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint,
           NeptuneClusterPort=bizcard_graph_db.attr_port,
           AWS_Region=core.Aws.REGION)

        neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(neptune_wb_lifecycle_content))

        neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'NpetuneWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'AWSNeptuneWorkbenchOctemberBizcardLCConfig',
            on_start=[neptune_wb_lifecycle_config_prop])

        neptune_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'NeptuneWorkbench',
            instance_type='ml.t2.medium',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=neptune_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='OctemberBizcard-NeptuneWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_bizcard_graph_db.security_group_name],
            subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
Exemplo n.º 22
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        table = aws_dynamodb.Table(self, "DashboardModel",
            partition_key=aws_dynamodb.Attribute(name="Pk", type=aws_dynamodb.AttributeType.STRING),
            sort_key=aws_dynamodb.Attribute(name="Sk", type=aws_dynamodb.AttributeType.STRING),
            billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST)
        
        kds_input_stream = aws_kinesis.Stream(self, "kds_dashboard_input_stream",
            shard_count=1, 
            stream_name="kds_dashboard_input_stream")
        
        kds_output_stream = aws_kinesis.Stream(self, "kds_dashboard_output_stream",
            shard_count=1, 
            stream_name="kds_dashboard_output_stream")

        # Creating a ingest bucket for this stack
        ingest_bucket = aws_s3.Bucket(self,'dreis_dboard_ingest_bucket')

        kfh_service_role = aws_iam.Role(self, 'KFH_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com')
        )

        kfh_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kfh_service_role.add_to_policy(kfh_policy_stmt)

        #Creating firehose for this stack
        kfh_source = aws_kinesisfirehose.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty(
            kinesis_stream_arn=kds_input_stream.stream_arn,
            role_arn=kfh_service_role.role_arn
        )

        kfh_datalake = aws_kinesisfirehose.CfnDeliveryStream(self, "kfh_datalake",
            s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
                bucket_arn=ingest_bucket.bucket_arn,
                buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty(
                    interval_in_seconds=60,
                    size_in_m_bs=5),
                compression_format="UNCOMPRESSED",
                role_arn=kfh_service_role.role_arn
                ),
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration=kfh_source
        )

        kda_service_role = aws_iam.Role(self, 'KDA_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('kinesisanalytics.amazonaws.com')
        )

        kda_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kda_service_role.add_to_policy(kda_policy_stmt)

        # KA doesn't like - (dash) in names
        col1 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="state",
            sql_type="VARCHAR(2)",
            mapping="$.state"
        )

        col2 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="event_time",
            sql_type="TIMESTAMP",
            mapping="$.event-time"
        )
        
        col3 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="region",  
            sql_type="VARCHAR(12)",
            mapping="$.region"
        )

        col4 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="store_id",
            sql_type="INTEGER",
            mapping="$.store-id"
        )

        col5 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_1",
            sql_type="INTEGER",
            mapping="$.kpi-1"
        )
        
        col6 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_2",
            sql_type="INTEGER",
            mapping="$.kpi-2"
        )

        col7 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_3",
            sql_type="INTEGER",
            mapping="$.kpi-3"
        )

        col8 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_4",
            sql_type="INTEGER",
            mapping="$.kpi-4"
        )

        col9 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_5",
            sql_type="INTEGER",
            mapping="$.kpi-5"
        )

        schema = aws_kinesisanalytics.CfnApplication.InputSchemaProperty(
            record_columns=[col2, col1, col3, col4, col5, col6, col7, col8, col9],
            record_encoding="UTF-8",
            record_format=aws_kinesisanalytics.CfnApplication.RecordFormatProperty(
                record_format_type="JSON",
                mapping_parameters=aws_kinesisanalytics.CfnApplication.MappingParametersProperty(
                    json_mapping_parameters=aws_kinesisanalytics.CfnApplication.JSONMappingParametersProperty(
                        record_row_path="$"
                    )
                )
            )
        )

        kda_is = aws_kinesisanalytics.CfnApplication.KinesisStreamsInputProperty(
            resource_arn=kds_input_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        ip = aws_kinesisanalytics.CfnApplication.InputProperty(
            name_prefix="SOURCE_SQL_STREAM",
            input_schema=schema,
            kinesis_streams_input=kda_is
        )

        application_code = "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STORE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), \"store-id\" INTEGER, kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STATE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_REGION\" (\"region\" VARCHAR(10), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STORE\"" + \
            "SELECT STREAM \"region\", \"state\", \"store-id\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", \"store-id\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STATE\"" + \
            "SELECT STREAM \"region\", \"state\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_REGION\"" + \
            "SELECT STREAM \"region\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);"

        kda_app = aws_kinesisanalytics.CfnApplication(self, "kda_agg",
            inputs=[ip], #kda_inputs,
            application_code=application_code, 
            application_description="Aggregating data", 
            application_name="DashboardMetricsAggregator"
        )

        kda_output_prop = aws_kinesisanalytics.CfnApplicationOutput.KinesisStreamsOutputProperty(
            resource_arn=kds_output_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        kda_dest_schema = aws_kinesisanalytics.CfnApplicationOutput.DestinationSchemaProperty(
            record_format_type="JSON"
        )

        kda_output_prop_by_store = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STORE"
        )

        kda_output_prop_by_state = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STATE"
        )

        kda_output_prop_by_region = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_REGION"
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_store",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_store
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_state",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_state
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_region",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_region
        )

        lambda_agg_function = aws_lambda.Function(self, "AggDataLambda",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            handler="lambda_function.lambda_handler",
            code=aws_lambda.Code.asset("../models/dashboard/lambdas/aggregate_data_lambda"),
            timeout=Duration.minutes(5))

        lambda_agg_function.add_environment("DDB_TABLE_DASHBOARD", table.table_name)

        lambda_agg_function.add_to_role_policy(aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "kinesis:*"
            ],
            resources=["*"]
        ))

        table.grant_read_write_data(lambda_agg_function)

        kes = aws_lambda_event_sources.KinesisEventSource(kds_output_stream,
            starting_position=aws_lambda.StartingPosition.TRIM_HORIZON,
            batch_size=50, 
            #max_batching_window=100
        )

        lambda_agg_function.add_event_source(kes)

        core.CfnOutput(
            self, "TableName_Dashboard",
            description="Table name for Dashboard",
            value=table.table_name
        )

        core.CfnOutput(
            self, "BucketName_Dashboard",
            description="Bucket name",
            value=ingest_bucket.bucket_arn
        )

        core.CfnOutput(
            self, "KinesisInputStream_Dashboard",
            description="Kinesis input for Dashboard",
            value=kds_input_stream.stream_name
        )

        core.CfnOutput(
            self, "KinesisOutputStream_Dashboard",
            description="Kinesis output for Dashboard",
            value=kds_output_stream.stream_name
        )
        
Exemplo n.º 23
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        API_ARN = self.node.try_get_context("api_arn")
        RATE = self.node.try_get_context("rate")

        if not API_ARN or not RATE:
            logger.error(
                f"Required context variables for {id} were not provided!")
        else:
            # Create the WAF IPSets
            doslist = wafv2.CfnIPSet(
                self,
                "Ext06DosIpSet",
                addresses=[],
                ip_address_version="IPV4",
                scope="REGIONAL",
                name="Ext06DosIpSet",
            )

            suslist = wafv2.CfnIPSet(
                self,
                "Ext06SusIpSet",
                addresses=[],
                ip_address_version="IPV4",
                scope="REGIONAL",
                name="Ext06SusIpSet",
            )

            # Create a WAF
            waf = wafv2.CfnWebACL(
                self,
                id="Ext06_WAF",
                name="Ext06-WAF",
                default_action=wafv2.CfnWebACL.DefaultActionProperty(allow={}),
                scope="REGIONAL",
                visibility_config=wafv2.CfnWebACL.VisibilityConfigProperty(
                    cloud_watch_metrics_enabled=True,
                    metric_name="EXT06_WAF",
                    sampled_requests_enabled=True),
                rules=[],
            )

            # Create Susunban lambda
            lambda_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks",
                                           "ext_06")
            susunban_lambda = _lambda.Function(
                self,
                "Ext06ResponseSusUnbanFunction",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="susunban_lambda.lambda_handler",
                code=_lambda.Code.from_asset(lambda_dir_path),
                environment={
                    "ipset_id": suslist.attr_id,
                    "ipset_name": suslist.name,
                    "ipset_scope": suslist.scope,
                })
            # Assign WAF permissions to lambda
            susunban_lambda.add_to_role_policy(
                iam.PolicyStatement(
                    actions=["wafv2:GetIPSet", "wafv2:UpdateIPSet"],
                    effect=iam.Effect.ALLOW,
                    resources=[suslist.attr_arn],
                ))

            # Create Dosunban lambda
            lambda_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks",
                                           "ext_06")
            dosunban_lambda = _lambda.Function(
                self,
                "Ext06ResponseDosUnbanFunction",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="dosunban_lambda.lambda_handler",
                code=_lambda.Code.from_asset(lambda_dir_path),
                environment={
                    "ipset_id": doslist.attr_id,
                    "ipset_name": doslist.name,
                    "ipset_scope": doslist.scope,
                })
            # Assign WAF permissions to lambda
            dosunban_lambda.add_to_role_policy(
                iam.PolicyStatement(
                    actions=["wafv2:GetIPSet", "wafv2:UpdateIPSet"],
                    effect=iam.Effect.ALLOW,
                    resources=[doslist.attr_arn],
                ))

            # Create dos stepfunction
            # Define a second state machine to unban the blacklisted IP after 1 hour
            doswait_step = sfn.Wait(
                self,
                "Ext06ResponseStepDosWait",
                time=sfn.WaitTime.duration(core.Duration.hours(1)),
            )
            suswait_step = sfn.Wait(
                self,
                "Ext06ResponseStepSusWait",
                time=sfn.WaitTime.duration(core.Duration.hours(1)),
            )
            dosunban_step = sfn.Task(
                self,
                "Ext06ResponseStepDosUnban",
                task=tasks.RunLambdaTask(
                    dosunban_lambda,
                    integration_pattern=sfn.ServiceIntegrationPattern.
                    FIRE_AND_FORGET,
                    payload={"Input.$": "$"},
                ),
            )
            susunban_step = sfn.Task(
                self,
                "Ext06ResponseStepSosUnban",
                task=tasks.RunLambdaTask(
                    susunban_lambda,
                    integration_pattern=sfn.ServiceIntegrationPattern.
                    FIRE_AND_FORGET,
                    payload={"Input.$": "$"},
                ),
            )
            dos_statemachine = sfn.StateMachine(
                self,
                "Ext06ResponseDosUnbanStateMachine",
                definition=doswait_step.next(dosunban_step),
                timeout=core.Duration.hours(1.5),
            )

            sus_statemachine = sfn.StateMachine(
                self,
                "Ext06ResponseSusUnbanStateMachine",
                definition=suswait_step.next(susunban_step),
                timeout=core.Duration.hours(1.5),
            )
            # Create lambda function
            lambda_func = _lambda.Function(
                self,
                "Ext06ResponseFunction",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="response_lambda.lambda_handler",
                code=_lambda.Code.from_asset(lambda_dir_path),
                environment={
                    "suslist_id": suslist.attr_id,
                    "suslist_name": suslist.name,
                    "suslist_scope": suslist.scope,
                    "doslist_id": doslist.attr_id,
                    "doslist_name": doslist.name,
                    "doslist_scope": doslist.scope,
                    "dos_arn": dos_statemachine.state_machine_arn,
                    "sus_arn": sus_statemachine.state_machine_arn,
                },
            )

            kinesis_log = s3.Bucket(
                self,
                id='dos_logs',
                access_control=s3.BucketAccessControl.PUBLIC_READ_WRITE,
            )

            # Assign permissions to response lambda
            lambda_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=[
                        "wafv2:GetIPSet",
                        "wafv2:UpdateIPSet",
                        "states:StartExecution",
                        "s3:GetObject",
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=[
                        doslist.attr_arn, suslist.attr_arn,
                        sus_statemachine.state_machine_arn,
                        dos_statemachine.state_machine_arn,
                        kinesis_log.bucket_arn, kinesis_log.bucket_arn,
                        kinesis_log.bucket_arn + "/*"
                    ],
                ))

            # Create an IAM role for the steram
            stream_role = iam.Role(
                self,
                id="waf-kinesis-log-role",
                assumed_by=iam.ServicePrincipal(
                    service="firehose.amazonaws.com", ),
            )

            stream_permissions = iam.Policy(
                self,
                id="Ext-06-kinesis-permissions",
                statements=[
                    iam.PolicyStatement(
                        actions=[
                            "s3:AbortMultipartUpload",
                            "s3:GetBucketLocation",
                            "s3:GetObject",
                            "s3:ListBucket",
                            "s3:ListBucketMultipartUploads",
                            "s3:PutObject",
                        ],
                        effect=iam.Effect.ALLOW,
                        resources=[
                            kinesis_log.bucket_arn,
                            kinesis_log.bucket_arn + "/*"
                        ],
                    )
                ])

            stream_role.attach_inline_policy(stream_permissions)

            log_stream = firehose.CfnDeliveryStream(
                self,
                id="aws-waf-logs-ext06",
                delivery_stream_type="DirectPut",
                delivery_stream_name="aws-waf-logs-ext06",
                s3_destination_configuration=firehose.CfnDeliveryStream.
                S3DestinationConfigurationProperty(
                    bucket_arn=kinesis_log.bucket_arn,
                    buffering_hints=firehose.CfnDeliveryStream.
                    BufferingHintsProperty(interval_in_seconds=300,
                                           size_in_m_bs=5),
                    compression_format="UNCOMPRESSED",
                    role_arn=stream_role.role_arn),
            )
            kinesis_log.add_event_notification(
                s3.EventType.OBJECT_CREATED,
                dest=s3_notifications.LambdaDestination(lambda_func))
            utc_time = datetime.now(tz=timezone.utc)
            utc_time = utc_time + timedelta(minutes=5)
            cron_string = "cron(" + str(utc_time.minute) + " " + str(
                utc_time.hour) + " " + str(utc_time.day) + " " + str(
                    utc_time.month) + " ? " + str(utc_time.year) + ")"
            trigger = events.Rule(
                self,
                id="ext-06 setup",
                rule_name="Ext06-trigger",
                schedule=events.Schedule.expression(cron_string))

            setup_dir_path = os.path.join(os.getcwd(), "ir_cdk_stacks",
                                          "ext_06")
            setup_func = _lambda.Function(
                self,
                id="Ext06Setup",
                runtime=_lambda.Runtime.PYTHON_3_8,
                handler="setup.lambda_handler",
                code=_lambda.Code.from_asset(setup_dir_path),
                environment={
                    "waf_arn": waf.attr_arn,
                    "waf_id": waf.attr_id,
                    "waf_scope": waf.scope,
                    "waf_name": waf.name,
                    "firehose_arn": log_stream.attr_arn,
                    "rule_name": "Ext06-trigger",
                    "doslist_arn": doslist.attr_arn,
                    "rate": str(RATE),
                },
            )

            # Assign permissions to setup lambda
            setup_func.add_to_role_policy(
                iam.PolicyStatement(
                    actions=[
                        "wafv2:PutLoggingConfiguration", "wafv2:GetWebACL",
                        "wafv2:UpdateWebACL"
                    ],
                    effect=iam.Effect.ALLOW,
                    resources=[waf.attr_arn, doslist.attr_arn],
                ))

            setup = targets.LambdaFunction(handler=setup_func, )

            setup.bind(rule=trigger)
            trigger.add_target(target=setup)

            wafv2.CfnWebACLAssociation(
                self,
                id="API gateway association",
                resource_arn=API_ARN,
                web_acl_arn=waf.attr_arn,
            )
Exemplo n.º 24
0
    def __init__(
        self,
        scope: core.Construct,
        construct_id: str,
        **kwargs,
    ) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # Get some context properties
        log_level = self.node.try_get_context("log_level")
        api_name = self.node.try_get_context("api_name")
        stage_name = self.node.try_get_context("stage_name")
        endpoint_filter = self.node.try_get_context("endpoint_filter")
        api_lambda_memory = self.node.try_get_context("api_lambda_memory")
        api_lambda_timeout = self.node.try_get_context("api_lambda_timeout")
        metrics_lambda_memory = self.node.try_get_context("metrics_lambda_memory")
        metrics_lambda_timeout = self.node.try_get_context("metrics_lambda_timeout")
        dynamodb_read_capacity = self.node.try_get_context("dynamodb_read_capacity")
        dynamodb_write_capacity = self.node.try_get_context("dynamodb_write_capacity")
        delivery_sync = self.node.try_get_context("delivery_sync")
        firehose_interval = self.node.try_get_context("firehose_interval")
        firehose_mb_size = self.node.try_get_context("firehose_mb_size")

        # Create dynamodb tables and kinesis stream per project
        assignment_table_name = f"{api_name}-assignment-{stage_name}"
        metrics_table_name = f"{api_name}-metrics-{stage_name}"
        delivery_stream_name = f"{api_name}-events-{stage_name}"
        log_stream_name = "ApiEvents"

        assignment_table = aws_dynamodb.Table(
            self,
            "AssignmentTable",
            table_name=assignment_table_name,
            partition_key=aws_dynamodb.Attribute(
                name="user_id",
                type=aws_dynamodb.AttributeType.STRING,
            ),
            sort_key=aws_dynamodb.Attribute(
                name="endpoint_name",
                type=aws_dynamodb.AttributeType.STRING,
            ),
            read_capacity=dynamodb_read_capacity,
            write_capacity=dynamodb_write_capacity,
            removal_policy=core.RemovalPolicy.DESTROY,
            time_to_live_attribute="ttl",
        )

        metrics_table = aws_dynamodb.Table(
            self,
            "MetricsTable",
            table_name=metrics_table_name,
            partition_key=aws_dynamodb.Attribute(
                name="endpoint_name", type=aws_dynamodb.AttributeType.STRING
            ),
            read_capacity=dynamodb_read_capacity,
            write_capacity=dynamodb_write_capacity,
            removal_policy=core.RemovalPolicy.DESTROY,
        )

        # Create lambda layer for "aws-xray-sdk" and latest "boto3"
        xray_layer = aws_lambda.LayerVersion(
            self,
            "XRayLayer",
            code=aws_lambda.AssetCode.from_asset("layers"),
            compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_7],
            description="A layer containing AWS X-Ray SDK for Python",
        )

        # Create Lambda function to read from assignment and metrics table, log metrics
        # 2048MB is ~3% higher than 768 MB, it runs 2.5x faster
        # https://aws.amazon.com/blogs/aws/new-for-aws-lambda-functions-with-up-to-10-gb-of-memory-and-6-vcpus/
        lambda_invoke = aws_lambda.Function(
            self,
            "ApiFunction",
            code=aws_lambda.AssetCode.from_asset("lambda/api"),
            handler="lambda_invoke.lambda_handler",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(api_lambda_timeout),
            memory_size=api_lambda_memory,
            environment={
                "ASSIGNMENT_TABLE": assignment_table.table_name,
                "METRICS_TABLE": metrics_table.table_name,
                "DELIVERY_STREAM_NAME": delivery_stream_name,
                "DELIVERY_SYNC": "true" if delivery_sync else "false",
                "LOG_LEVEL": log_level,
            },
            layers=[xray_layer],
            tracing=aws_lambda.Tracing.ACTIVE,
        )

        # Grant read/write permissions to assignment and metrics tables
        assignment_table.grant_read_data(lambda_invoke)
        assignment_table.grant_write_data(lambda_invoke)
        metrics_table.grant_read_data(lambda_invoke)

        # Add sagemaker invoke
        lambda_invoke.add_to_role_policy(
            aws_iam.PolicyStatement(
                actions=[
                    "sagemaker:InvokeEndpoint",
                ],
                resources=[
                    "arn:aws:sagemaker:{}:{}:endpoint/{}".format(
                        self.region, self.account, endpoint_filter
                    ),
                ],
            )
        )

        # Create API Gateway for api lambda, which will create an output
        aws_apigateway.LambdaRestApi(
            self,
            "Api",
            rest_api_name=api_name,
            deploy_options=aws_apigateway.StageOptions(stage_name=stage_name),
            proxy=True,
            handler=lambda_invoke,
        )

        # Create lambda function for processing metrics
        lambda_register = aws_lambda.Function(
            self,
            "RegisterFunction",
            code=aws_lambda.AssetCode.from_asset("lambda/api"),
            handler="lambda_register.lambda_handler",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(metrics_lambda_timeout),
            memory_size=metrics_lambda_memory,
            environment={
                "METRICS_TABLE": metrics_table.table_name,
                "DELIVERY_STREAM_NAME": delivery_stream_name,
                "DELIVERY_SYNC": "true" if delivery_sync else "false",
                "LOG_LEVEL": log_level,
            },
            layers=[xray_layer],
            tracing=aws_lambda.Tracing.ACTIVE,
        )

        # Add write metrics
        metrics_table.grant_write_data(lambda_register)

        # Add sagemaker invoke
        lambda_register.add_to_role_policy(
            aws_iam.PolicyStatement(
                actions=[
                    "sagemaker:DescribeEndpoint",
                ],
                resources=[
                    "arn:aws:sagemaker:{}:{}:endpoint/{}".format(
                        self.region, self.account, endpoint_filter
                    ),
                ],
            )
        )

        # Grant permissions to the service catalog use role
        service_catalog_role = aws_iam.Role.from_role_arn(
            self,
            "RegisterRole",
            f"arn:{self.partition}:iam::{self.account}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole",
        )
        lambda_register.grant_invoke(service_catalog_role)

        # Return the register lambda function as output
        core.CfnOutput(self, "RegisterLambda", value=lambda_register.function_name)

        # Get cloudwatch put metrics policy ()
        cloudwatch_metric_policy = aws_iam.PolicyStatement(
            actions=["cloudwatch:PutMetricData"], resources=["*"]
        )

        # If we are only using sync delivery, don't require firehose or s3 buckets
        if delivery_sync:
            metrics_table.grant_write_data(lambda_invoke)
            lambda_invoke.add_to_role_policy(cloudwatch_metric_policy)
            print("# No Firehose")
            return

        # Add kinesis stream logging
        lambda_invoke.add_to_role_policy(
            aws_iam.PolicyStatement(
                actions=[
                    "firehose:PutRecord",
                ],
                resources=[
                    "arn:aws:firehose:{}:{}:deliverystream/{}".format(
                        self.region, self.account, delivery_stream_name
                    ),
                ],
            )
        )

        # Create s3 bucket for event logging (name must be < 63 chars)
        s3_logs = aws_s3.Bucket(
            self,
            "S3Logs",
            removal_policy=core.RemovalPolicy.DESTROY,
        )

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
        )

        firehose_role.add_to_policy(
            aws_iam.PolicyStatement(
                actions=[
                    "s3:AbortMultipartUpload",
                    "s3:GetBucketLocation",
                    "s3:GetObject",
                    "s3:ListBucket",
                    "s3:ListBucketMultipartUploads",
                    "s3:PutObject",
                ],
                resources=[s3_logs.bucket_arn, f"{s3_logs.bucket_arn}/*"],
            )
        )

        # Create LogGroup and Stream, and add permissions to role
        firehose_log_group = aws_logs.LogGroup(self, "FirehoseLogGroup")
        firehose_log_stream = firehose_log_group.add_stream(log_stream_name)

        firehose_role.add_to_policy(
            aws_iam.PolicyStatement(
                actions=[
                    "logs:PutLogEvents",
                ],
                resources=[
                    f"arn:{self.partition}:logs:{self.region}:{self.account}:log-group:{firehose_log_group.log_group_name}:log-stream:{firehose_log_stream.log_stream_name}",
                ],
            )
        )

        # Creat the firehose delivery stream with s3 destination
        aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KensisLogs",
            delivery_stream_name=delivery_stream_name,
            s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
                bucket_arn=s3_logs.bucket_arn,
                compression_format="GZIP",
                role_arn=firehose_role.role_arn,
                prefix=f"{stage_name}/",
                cloud_watch_logging_options=aws_kinesisfirehose.CfnDeliveryStream.CloudWatchLoggingOptionsProperty(
                    enabled=True,
                    log_group_name=firehose_log_group.log_group_name,
                    log_stream_name=firehose_log_stream.log_stream_name,
                ),
                buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty(
                    interval_in_seconds=firehose_interval,
                    size_in_m_bs=firehose_mb_size,
                ),
            ),
        )

        # Create lambda function for processing metrics
        lambda_metrics = aws_lambda.Function(
            self,
            "MetricsFunction",
            code=aws_lambda.AssetCode.from_asset("lambda/api"),
            handler="lambda_metrics.lambda_handler",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(metrics_lambda_timeout),
            memory_size=metrics_lambda_memory,
            environment={
                "METRICS_TABLE": metrics_table.table_name,
                "DELIVERY_STREAM_NAME": delivery_stream_name,
                "LOG_LEVEL": log_level,
            },
            layers=[xray_layer],
            tracing=aws_lambda.Tracing.ACTIVE,
        )

        # Add write metrics for dynamodb table
        metrics_table.grant_write_data(lambda_metrics)

        # Add put metrics for cloudwatch
        lambda_metrics.add_to_role_policy(cloudwatch_metric_policy)

        # Allow metrics to read form S3 and write to DynamoDB
        s3_logs.grant_read(lambda_metrics)

        # Create S3 logs notification for processing lambda
        notification = aws_s3_notifications.LambdaDestination(lambda_metrics)
        s3_logs.add_event_notification(aws_s3.EventType.OBJECT_CREATED, notification)
Exemplo n.º 25
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here

        role01 = iam.CfnRole(
            self,
            id="firehose01_role",
            assume_role_policy_document={
                "Statement": [{
                    "Action": "sts:AssumeRole",
                    "Effect": "Allow",
                    "Principal": {
                        "Service": "lambda.amazonaws.com"
                    }
                }],
                "Version":
                "2012-10-17"
            },
            managed_policy_arns=[
                "arn:aws:iam::aws:policy/service-role/AWSLambdaKinesisExecutionRole"
            ])

        policy01 = iam.CfnPolicy(self,
                                 id="firehose01_policy",
                                 policy_name="firehose01_policy",
                                 policy_document={
                                     'Version':
                                     "2012-10-17",
                                     'Statement': [{
                                         "Action": [
                                             's3:AbortMultipartUpload',
                                             's3:GetBucketLocation',
                                             's3:GetObject', 's3:ListBucket',
                                             's3:ListBucketMultipartUploads',
                                             's3:PutObject'
                                         ],
                                         "Resource": ['*'],
                                         "Effect":
                                         "Allow"
                                     }]
                                 },
                                 roles=[role01.ref])

        delivery_stream = kinesisfirehose.CfnDeliveryStream(
            self,
            id="firehose01",
            delivery_stream_name="firehose01",
            extended_s3_destination_configuration={
                # s3桶信息
                'bucketArn': 'arn:aws:s3:::fluent-bit-s3',

                # 压缩设置,老方案:gzip,新方案待定
                'compressionFormat': 'GZIP',
                # 格式转换,是否转换为orc,parquet,默认无
                'DataFormatConversionConfiguration': "Disabled",
                # 是否加密:默认无
                'EncryptionConfiguration': "NoEncryption",
                # 错误输出前缀
                'bufferingHints': {
                    'intervalInSeconds': 600,
                    'sizeInMBs': 128
                },
                'ProcessingConfiguration': {
                    "Enabled": True,
                    "Processor": {
                        "Type":
                        "Lambda",
                        "Parameters": [{
                            "ParameterName": "BufferIntervalInSeconds",
                            "ParameterValue": "60"
                        }, {
                            "ParameterName": "BufferSizeInMBs",
                            "ParameterValue": "3"
                        }, {
                            "ParameterName":
                            "LambdaArn",
                            "ParameterValue":
                            "arn:aws:lambda:ap-southeast-1:596030579944:function:firehose-test"
                        }]
                    }
                },
                'roleArn':
                'arn:aws:iam::596030579944:role/avalon_lambda_kinesis_role',
                'S3BackupConfiguration': {
                    "BucketARN":
                    'arn:aws:s3:::fluent-bit-s3',
                    'bufferingHints': {
                        'intervalInSeconds': 600,
                        'sizeInMBs': 128
                    },
                    'compressionFormat':
                    'GZIP',
                    'EncryptionConfiguration':
                    "NoEncryption",
                    'Prefix':
                    "/backup",
                    'roleArn':
                    'arn:aws:iam::596030579944:role/avalon_lambda_kinesis_role'
                }
            },
        )
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)
        # create db for glue schema
        glue_db = glue.Database(
            self,
            'GlueDB',
            database_name='reddit_data',
        )

        # data schema
        glue_table = glue.Table(
            self,
            'GlueTable',
            table_name='sentiment',
            columns=[
                glue.Column(name='@timestamp', type=glue.Schema.TIMESTAMP),
                glue.Column(name='id', type=glue.Schema.STRING),
                glue.Column(name='subreddit', type=glue.Schema.STRING),
                glue.Column(name='body', type=glue.Schema.STRING),
                glue.Column(name='is_submitter', type=glue.Schema.BOOLEAN),
                glue.Column(name='polarity', type=glue.Schema.FLOAT),
                glue.Column(name='subjectivity', type=glue.Schema.FLOAT),
                glue.Column(name='author', type=glue.Schema.STRING),
            ],
            database=glue_db,
            data_format=glue.DataFormat.PARQUET,
            bucket=s3.Bucket.from_bucket_arn(self, 'DataBucket', BUCKET_ARN),
            s3_prefix='reddit/',
        )

        # role assumed by firehose
        stream_role = iam.Role(
            self,
            'FirehoseRole',
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'),
            description='role used by Firehose to access s3 bucket',
        )

        # add s3 statement
        stream_role.add_to_policy(
            iam.PolicyStatement(
                resources=[BUCKET_ARN, f'{BUCKET_ARN}/*'],
                actions=[
                    's3:AbortMultipartUpload',
                    's3:GetBucketLocation',
                    's3:GetObject',
                    's3:ListBucket',
                    's3:ListBucketMultipartUploads',
                    's3:PutObject',
                ],
            ))

        # add glue statement
        stream_role.add_to_policy(
            iam.PolicyStatement(
                resources=[
                    glue_table.table_arn,
                    glue_db.database_arn,
                    glue_db.catalog_arn,
                ],
                actions=[
                    'glue:GetTable',
                    'glue:GetTableVersion',
                    'glue:GetTableVersions',
                ],
            ))

        # cloudwatch statement
        stream_role.add_to_policy(
            iam.PolicyStatement(
                resources=['*'],
                actions=[
                    'logs:PutLogEvents',
                ],
            ))

        data_format_conversion_configuration = kf.CfnDeliveryStream.DataFormatConversionConfigurationProperty(
            enabled=True,
            input_format_configuration=kf.CfnDeliveryStream.
            InputFormatConfigurationProperty(
                deserializer=kf.CfnDeliveryStream.DeserializerProperty(
                    hive_json_ser_de=kf.CfnDeliveryStream.
                    HiveJsonSerDeProperty(), ), ),
            output_format_configuration=kf.CfnDeliveryStream.
            OutputFormatConfigurationProperty(
                serializer=kf.CfnDeliveryStream.SerializerProperty(
                    parquet_ser_de=kf.CfnDeliveryStream.ParquetSerDeProperty(),
                ), ),
            schema_configuration=kf.CfnDeliveryStream.
            SchemaConfigurationProperty(
                database_name=glue_db.database_name,
                table_name=glue_table.table_name,
                role_arn=stream_role.role_arn,
                region='us-east-2',
            ),
        )

        s3_config = kf.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=BUCKET_ARN,  # temporary, will replace with env variable
            role_arn=stream_role.role_arn,
            data_format_conversion_configuration=
            data_format_conversion_configuration,
            prefix='reddit/',
            buffering_hints=kf.CfnDeliveryStream.BufferingHintsProperty(
                size_in_m_bs=64, ),
        )

        firehose = kf.CfnDeliveryStream(
            self,
            'FirehoseStream',
            delivery_stream_name='RedditDataStream',
            extended_s3_destination_configuration=s3_config,
        )

        # add role dependency
        firehose.node.add_dependency(stream_role)

        # add ECS Fargate instance
        app_role = iam.Role(
            self,
            'RedditStreamingAppRole',
            assumed_by=iam.ServicePrincipal('ecs-tasks.amazonaws.com'),
            description=
            'Role used by the Reddit Streaming Application Fargate Task',
        )

        # add firehose permissions
        app_role.add_to_policy(
            iam.PolicyStatement(
                resources=[firehose.attr_arn],
                actions=[
                    'firehose:DeleteDeliveryStream',
                    'firehose:PutRecord',
                    'firehose:PutRecordBatch',
                    'firehose:UpdateDestination',
                ],
            ))

        # add ecs and cloudwatch permissions
        app_role.add_to_policy(
            iam.PolicyStatement(
                resources=['*'],
                actions=[
                    'ecr:GetAuthorizationToken',
                    'ecr:BatchCheckLayerAvailability',
                    'ecr:GetDownloadUrlForLayer',
                    'ecr:BatchGetImage',
                    'logs:CreateLogStream',
                    'logs:PutLogEvents',
                ],
            ))

        vpc = ec2.Vpc(self, 'RedditVpc', max_azs=3)

        cluster = ecs.Cluster(self, 'RedditCluster', vpc=vpc)

        task_definition = ecs.FargateTaskDefinition(
            self,
            'TaskDefinition',
            memory_limit_mib=512,
            cpu=256,
            task_role=app_role,
        )

        task_definition.add_container(
            id='RedditStreamingApp',
            image=ecs.ContainerImage.from_asset('./sentiment_analysis'),
            command=['all'],
            environment={
                'FIREHOSE_STREAM_NAME': firehose.delivery_stream_name,
                'PRAW_CLIENT_SECRET': os.environ['PRAW_CLIENT_SECRET'],
                'PRAW_CLIENT_ID': os.environ['PRAW_CLIENT_ID'],
                'PRAW_USER_AGENT': os.environ['PRAW_USER_AGENT'],
            },
            logging=ecs.LogDriver.aws_logs(stream_prefix='reddit'),
        )

        container = ecs.FargateService(
            self,
            'StreamingApplication',
            desired_count=1,
            task_definition=task_definition,
            cluster=cluster,
            assign_public_ip=True,
        )
    def __init__(self, scope: core.Construct, id: str,
                 props: KinesisFirehoseStackProps, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        lambda_repository = aws_codecommit.Repository(
            self,
            "ClicksProcessingLambdaRepository",
            repository_name="MythicalMysfits-ClicksProcessingLambdaRepository",
        )

        core.CfnOutput(
            self,
            "kinesisRepositoryCloneUrlHttp",
            value=lambda_repository.repository_clone_url_http,
            description="Clicks Processing Lambda Repository Clone URL HTTP",
        )

        core.CfnOutput(
            self,
            "kinesisRepositoryCloneUrlSsh",
            value=lambda_repository.repository_clone_url_ssh,
            description="Clicks Processing Lambda Repository Clone URL SSH",
        )

        clicks_destination_bucket = aws_s3.Bucket(self,
                                                  "Bucket",
                                                  versioned=True)

        lambda_function_policy = aws_iam.PolicyStatement()
        lambda_function_policy.add_actions("dynamodb:GetItem")
        lambda_function_policy.add_resources(props.table.table_arn)

        mysfits_clicks_processor = aws_lambda.Function(
            self,
            "Function",
            handler="streamProcessor.processRecord",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            description=
            "An Amazon Kinesis Firehose stream processor that enriches click records to not just include a mysfitId, but also other attributes that can be analyzed later.",
            memory_size=128,
            code=aws_lambda.Code.asset("../../lambda-streaming-processor"),
            timeout=core.Duration.seconds(30),
            initial_policy=[lambda_function_policy],
            environment={
                # TODO: this seems better than having the user copy/paste it in, but is it the best way?
                "MYSFITS_API_URL":
                "https://{}.execute-api.{}.amazonaws.com/prod/".format(
                    props.api_gateway.ref, core.Aws.REGION)
            },
        )

        firehose_delivery_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            external_id=core.Aws.ACCOUNT_ID,
        )

        firehose_delivery_policy_s3_statement = aws_iam.PolicyStatement()
        firehose_delivery_policy_s3_statement.add_actions(
            "s3:AbortMultipartUpload",
            "s3:GetBucketLocation",
            "s3:GetObject",
            "s3:ListBucket",
            "s3:ListBucketMultipartUploads",
            "s3:PutObject",
        )
        firehose_delivery_policy_s3_statement.add_resources(
            clicks_destination_bucket.bucket_arn)
        firehose_delivery_policy_s3_statement.add_resources(
            clicks_destination_bucket.arn_for_objects("*"))

        firehose_delivery_policy_lambda_statement = aws_iam.PolicyStatement()
        firehose_delivery_policy_lambda_statement.add_actions(
            "lambda:InvokeFunction")
        firehose_delivery_policy_lambda_statement.add_resources(
            mysfits_clicks_processor.function_arn)

        firehose_delivery_role.add_to_policy(
            firehose_delivery_policy_s3_statement)
        firehose_delivery_role.add_to_policy(
            firehose_delivery_policy_lambda_statement)

        mysfits_firehose_to_s3 = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "DeliveryStream",
            extended_s3_destination_configuration=aws_kinesisfirehose.
            CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
                bucket_arn=clicks_destination_bucket.bucket_arn,
                buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.
                BufferingHintsProperty(interval_in_seconds=60,
                                       size_in_m_bs=50),
                compression_format="UNCOMPRESSED",
                prefix="firehose/",
                role_arn=firehose_delivery_role.role_arn,
                processing_configuration=aws_kinesisfirehose.CfnDeliveryStream.
                ProcessingConfigurationProperty(
                    enabled=True,
                    processors=[
                        aws_kinesisfirehose.CfnDeliveryStream.
                        ProcessorProperty(
                            parameters=[
                                aws_kinesisfirehose.CfnDeliveryStream.
                                ProcessorParameterProperty(
                                    parameter_name="LambdaArn",
                                    parameter_value=mysfits_clicks_processor.
                                    function_arn,
                                )
                            ],
                            type="Lambda",
                        )
                    ],
                ),
            ),
        )

        aws_lambda.CfnPermission(
            self,
            "Permission",
            action="lambda:InvokeFunction",
            function_name=mysfits_clicks_processor.function_arn,
            principal="firehose.amazonaws.com",
            source_account=core.Aws.ACCOUNT_ID,
            source_arn=mysfits_firehose_to_s3.attr_arn,
        )

        click_processing_api_role = aws_iam.Role(
            self,
            "ClickProcessingApiRole",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
        )

        api_policy = aws_iam.PolicyStatement()
        api_policy.add_actions("firehose:PutRecord")
        api_policy.add_resources(mysfits_firehose_to_s3.attr_arn)
        aws_iam.Policy(
            self,
            "ClickProcessingApiPolicy",
            policy_name="api_gateway_firehose_proxy_role",
            statements=[api_policy],
            roles=[click_processing_api_role],
        )

        api = aws_apigateway.RestApi(
            self,
            "APIEndpoint",
            rest_api_name="ClickProcessing API Service",
            endpoint_types=[aws_apigateway.EndpointType.REGIONAL],
        )

        clicks = api.root.add_resource("clicks")

        clicks.add_method(
            "PUT",
            aws_apigateway.AwsIntegration(
                service="firehose",
                integration_http_method="POST",
                action="PutRecord",
                options=aws_apigateway.IntegrationOptions(
                    connection_type=aws_apigateway.ConnectionType.INTERNET,
                    credentials_role=click_processing_api_role,
                    integration_responses=[
                        aws_apigateway.IntegrationResponse(
                            status_code="200",
                            response_templates={
                                "application/json": '{"status": "OK"}'
                            },
                            response_parameters={
                                "method.response.header.Access-Control-Allow-Headers":
                                "'Content-Type'",
                                "method.response.header.Access-Control-Allow-Methods":
                                "'OPTIONS,PUT'",
                                "method.response.header.Access-Control-Allow-Origin":
                                "'*'",
                            },
                        )
                    ],
                    request_parameters={
                        "integration.request.header.Content-Type":
                        "'application/x-amz-json-1.1'"
                    },
                    request_templates={
                        "application/json":
                        """{ "DeliveryStreamName": "%s", "Record": { "Data": "$util.base64Encode($input.json('$'))" }}"""
                        % mysfits_firehose_to_s3.ref
                    },
                ),
            ),
            method_responses=[
                aws_apigateway.MethodResponse(
                    status_code="200",
                    response_parameters={
                        "method.response.header.Access-Control-Allow-Headers":
                        True,
                        "method.response.header.Access-Control-Allow-Methods":
                        True,
                        "method.response.header.Access-Control-Allow-Origin":
                        True,
                    },
                )
            ],
        )

        clicks.add_method(
            "OPTIONS",
            aws_apigateway.MockIntegration(
                integration_responses=[
                    aws_apigateway.IntegrationResponse(
                        status_code="200",
                        response_parameters={
                            "method.response.header.Access-Control-Allow-Headers":
                            "'Content-Type,X-Amz-Date,Authorization,X-Api-Key,X-Amz-Security-Token,X-Amz-User-Agent'",
                            "method.response.header.Access-Control-Allow-Origin":
                            "'*'",
                            "method.response.header.Access-Control-Allow-Credentials":
                            "'false'",
                            "method.response.header.Access-Control-Allow-Methods":
                            "'OPTIONS,GET,PUT,POST,DELETE'",
                        },
                    )
                ],
                passthrough_behavior=aws_apigateway.PassthroughBehavior.NEVER,
                request_templates={"application/json": '{"statusCode": 200}'},
            ),
            method_responses=[
                aws_apigateway.MethodResponse(
                    status_code="200",
                    response_parameters={
                        "method.response.header.Access-Control-Allow-Headers":
                        True,
                        "method.response.header.Access-Control-Allow-Methods":
                        True,
                        "method.response.header.Access-Control-Allow-Credentials":
                        True,
                        "method.response.header.Access-Control-Allow-Origin":
                        True,
                    },
                )
            ],
        )
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here

        ###########################################################################
        # AWS SECRETS MANAGER - Templated secret
        ###########################################################################
        # templated_secret = aws_secretsmanager.Secret(self, "TemplatedSecret",
        #     generate_secret_string=aws_secretsmanager.SecretStringGenerator(
        #         secret_string_template= "{\"username\":\"cleanbox\"}",
        #         generate_string_key="password"
        #     )
        # )
        ###########################################################################
        # CUSTOM CLOUDFORMATION RESOURCE
        ###########################################################################
        # customlambda = aws_lambda.Function(self,'customconfig',
        # handler='customconfig.on_event',
        # runtime=aws_lambda.Runtime.PYTHON_3_7,
        # code=aws_lambda.Code.asset('customconfig'),
        # )

        # customlambda_statement = aws_iam.PolicyStatement(actions=["events:PutRule"], conditions=None, effect=None, not_actions=None, not_principals=None, not_resources=None, principals=None, resources=["*"], sid=None)
        # customlambda.add_to_role_policy(statement=customlambda_statement)

        # my_provider = cr.Provider(self, "MyProvider",
        #     on_event_handler=customlambda,
        #     # is_complete_handler=is_complete, # optional async "waiter"
        #     log_retention=logs.RetentionDays.SIX_MONTHS
        # )

        # CustomResource(self, 'customconfigresource', service_token=my_provider.service_token)

        ###########################################################################
        # AWS LAMBDA FUNCTIONS
        ###########################################################################
        sqs_to_elastic_cloud = aws_lambda.Function(
            self,
            'sqs_to_elastic_cloud',
            handler='sqs_to_elastic_cloud.lambda_handler',
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            code=aws_lambda.Code.asset('sqs_to_elastic_cloud'),
            memory_size=4096,
            timeout=core.Duration.seconds(300),
            log_retention=logs.RetentionDays.ONE_DAY)

        sqs_to_elasticsearch_service = aws_lambda.Function(
            self,
            'sqs_to_elasticsearch_service',
            handler='sqs_to_elasticsearch_service.lambda_handler',
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            code=aws_lambda.Code.asset('sqs_to_elasticsearch_service'),
            memory_size=4096,
            timeout=core.Duration.seconds(300),
            log_retention=logs.RetentionDays.ONE_DAY)

        # sqs_to_elasticsearch_service.add_environment("kinesis_firehose_name", "-")
        # sqs_to_elastic_cloud.add_environment("index_name", "-")

        ###########################################################################
        # AWS LAMBDA FUNCTIONS
        ###########################################################################
        # sqs_to_elasticsearch_service_permission = aws_lambda.Permission(*, principal, action=None, event_source_token=None, scope=None, source_account=None, source_arn=None)

        ###########################################################################
        # AMAZON S3 BUCKETS
        ###########################################################################
        access_log_bucket = aws_s3.Bucket(self, "access_log_bucket")
        kinesis_log_bucket = aws_s3.Bucket(self, "kinesis_log_bucket")

        ###########################################################################
        # LAMBDA SUPPLEMENTAL POLICIES
        ###########################################################################
        lambda_supplemental_policy_statement = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=["s3:Get*", "s3:Head*", "s3:List*", "firehose:*"],
            resources=["*"])

        sqs_to_elastic_cloud.add_to_role_policy(
            lambda_supplemental_policy_statement)
        sqs_to_elasticsearch_service.add_to_role_policy(
            lambda_supplemental_policy_statement)
        ###########################################################################
        # AWS SNS TOPICS
        ###########################################################################
        access_log_topic = aws_sns.Topic(self, "access_log_topic")

        ###########################################################################
        # ADD AMAZON S3 BUCKET NOTIFICATIONS
        ###########################################################################
        access_log_bucket.add_event_notification(
            aws_s3.EventType.OBJECT_CREATED,
            aws_s3_notifications.SnsDestination(access_log_topic))

        ###########################################################################
        # AWS SQS QUEUES
        ###########################################################################
        sqs_to_elasticsearch_service_queue_iqueue = aws_sqs.Queue(
            self, "sqs_to_elasticsearch_service_queue_dlq")
        sqs_to_elasticsearch_service_queue_dlq = aws_sqs.DeadLetterQueue(
            max_receive_count=10,
            queue=sqs_to_elasticsearch_service_queue_iqueue)
        sqs_to_elasticsearch_service_queue = aws_sqs.Queue(
            self,
            "sqs_to_elasticsearch_service_queue",
            visibility_timeout=core.Duration.seconds(301),
            dead_letter_queue=sqs_to_elasticsearch_service_queue_dlq)

        sqs_to_elastic_cloud_queue_iqueue = aws_sqs.Queue(
            self, "sqs_to_elastic_cloud_queue_dlq")
        sqs_to_elastic_cloud_queue_dlq = aws_sqs.DeadLetterQueue(
            max_receive_count=10, queue=sqs_to_elastic_cloud_queue_iqueue)
        sqs_to_elastic_cloud_queue = aws_sqs.Queue(
            self,
            "sqs_to_elastic_cloud_queue",
            visibility_timeout=core.Duration.seconds(301),
            dead_letter_queue=sqs_to_elastic_cloud_queue_dlq)

        ###########################################################################
        # AWS SNS TOPIC SUBSCRIPTIONS
        ###########################################################################
        access_log_topic.add_subscription(
            aws_sns_subscriptions.SqsSubscription(sqs_to_elastic_cloud_queue))
        access_log_topic.add_subscription(
            aws_sns_subscriptions.SqsSubscription(
                sqs_to_elasticsearch_service_queue))

        ###########################################################################
        # AWS LAMBDA SQS EVENT SOURCE
        ###########################################################################
        sqs_to_elastic_cloud.add_event_source(
            SqsEventSource(sqs_to_elastic_cloud_queue, batch_size=10))
        sqs_to_elasticsearch_service.add_event_source(
            SqsEventSource(sqs_to_elasticsearch_service_queue, batch_size=10))

        ###########################################################################
        # AWS ELASTICSEARCH DOMAIN
        ###########################################################################

        ###########################################################################
        # AWS ELASTICSEARCH DOMAIN ACCESS POLICY
        ###########################################################################
        this_aws_account = aws_iam.AccountPrincipal(account_id="012345678912")
        # s3_to_elasticsearch_access_logs_domain_access_policy_statement = aws_iam.PolicyStatement(
        #     principals=[this_aws_account],
        #     effect=aws_iam.Effect.ALLOW,
        #     actions=["es:*"],
        #     resources=["*"]
        #     )
        # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list=[]
        # s3_to_elasticsearch_access_logs_domain_access_policy_statement_list.append(s3_to_elasticsearch_access_logs_domain_access_policy_statement)

        s3_to_elasticsearch_access_logs_domain = aws_elasticsearch.Domain(
            self,
            "s3-to-elasticsearch-access-logs-domain",
            # access_policies=s3_to_elasticsearch_access_logs_domain_access_policy_statement_list,
            version=aws_elasticsearch.ElasticsearchVersion.V7_1,
            capacity={
                "master_nodes": 3,
                "data_nodes": 4
            },
            ebs={"volume_size": 100},
            zone_awareness={"availability_zone_count": 2},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            })

        ###########################################################################
        # AMAZON COGNITO USER POOL
        ###########################################################################
        s3_to_elasticsearch_user_pool = aws_cognito.UserPool(
            self,
            "s3-to-elasticsearch-access-logs-pool",
            account_recovery=None,
            auto_verify=None,
            custom_attributes=None,
            email_settings=None,
            enable_sms_role=None,
            lambda_triggers=None,
            mfa=None,
            mfa_second_factor=None,
            password_policy=None,
            self_sign_up_enabled=None,
            sign_in_aliases=aws_cognito.SignInAliases(email=True,
                                                      phone=None,
                                                      preferred_username=None,
                                                      username=True),
            sign_in_case_sensitive=None,
            sms_role=None,
            sms_role_external_id=None,
            standard_attributes=None,
            user_invitation=None,
            user_pool_name=None,
            user_verification=None)

        ###########################################################################
        # AMAZON KINESIS FIREHOSE STREAM
        ###########################################################################
        # kinesis_policy_statement = aws_iam.PolicyStatement(
        #     effect=aws_iam.Effect.ALLOW,
        #     # actions=["es:*", "s3:*", "kms:*", "kinesis:*", "lambda:*"],
        #     actions=["*"],
        #     resources=["*"]
        #     )

        # kinesis_policy_document = aws_iam.PolicyDocument()
        # kinesis_policy_document.add_statements(kinesis_policy_statement)

        kinesis_firehose_stream_role = aws_iam.Role(
            self,
            "BaseVPCIAMLogRole",
            assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com'),
            role_name=None,
            inline_policies={
                "AllowLogAccess":
                aws_iam.PolicyDocument(
                    assign_sids=False,
                    statements=[
                        aws_iam.PolicyStatement(actions=[
                            '*', 'es:*', 'logs:PutLogEvents',
                            'logs:DescribeLogGroups',
                            'logs:DescribeLogsStreams'
                        ],
                                                effect=aws_iam.Effect('ALLOW'),
                                                resources=['*'])
                    ])
            })

        RetryOptions = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchRetryOptionsProperty(
            duration_in_seconds=300)
        s3_configuration = aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
            bucket_arn=kinesis_log_bucket.bucket_arn,
            role_arn=kinesis_firehose_stream_role.role_arn)

        ElasticsearchDestinationConfiguration = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty(
            # "BufferingHints" : ElasticsearchBufferingHints,
            # "CloudWatchLoggingOptions" : CloudWatchLoggingOptions,
            # "ClusterEndpoint" : String,
            domain_arn=s3_to_elasticsearch_access_logs_domain.domain_arn,
            index_name="s3-to-elasticsearch-accesslogs",
            index_rotation_period="OneDay",
            # "ProcessingConfiguration" : ProcessingConfiguration,
            retry_options=RetryOptions,
            role_arn=kinesis_firehose_stream_role.role_arn,
            # "S3BackupMode" : String,
            s3_configuration=s3_configuration
            # "TypeName" : String
            # "VpcConfiguration" : VpcConfiguration
        )

        kinesis_firehose_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "kinesis_firehose_stream",
            delivery_stream_encryption_configuration_input=None,
            delivery_stream_name=None,
            delivery_stream_type=None,
            elasticsearch_destination_configuration=
            ElasticsearchDestinationConfiguration,
            extended_s3_destination_configuration=None,
            http_endpoint_destination_configuration=None,
            kinesis_stream_source_configuration=None,
            redshift_destination_configuration=None,
            s3_destination_configuration=None,
            splunk_destination_configuration=None,
            tags=None)

        sqs_to_elasticsearch_service.add_environment(
            "FIREHOSE_NAME", kinesis_firehose_stream.ref)
        sqs_to_elasticsearch_service.add_environment(
            "QUEUEURL", sqs_to_elasticsearch_service_queue.queue_url)
        sqs_to_elasticsearch_service.add_environment("DEBUG", "False")

        sqs_to_elastic_cloud.add_environment("ELASTICCLOUD_SECRET_NAME", "-")
        sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_ID", "-")
        sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_PASSWORD", "-")
        sqs_to_elastic_cloud.add_environment("ELASTIC_CLOUD_USERNAME", "-")
        sqs_to_elastic_cloud.add_environment(
            "QUEUEURL", sqs_to_elastic_cloud_queue.queue_url)
        sqs_to_elastic_cloud.add_environment("DEBUG", "False")
Exemplo n.º 29
0
    def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter(
            self,
            'OpenSearchDomainName',
            type='String',
            description='Amazon OpenSearch Service domain name',
            default='opensearch-{}'.format(''.join(
                random.sample((string.ascii_letters), k=5))),
            allowed_pattern='[a-z]+[A-Za-z0-9\-]+')

        OPENSEARCH_INDEX_NAME = cdk.CfnParameter(
            self,
            'SearchIndexName',
            type='String',
            description='Amazon OpenSearch Service index name')

        EC2_KEY_PAIR_NAME = cdk.CfnParameter(
            self,
            'EC2KeyPairName',
            type='String',
            description='Amazon EC2 Instance KeyPair name')

        # vpc_name = self.node.try_get_context("vpc_name")
        # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC",
        #   is_default=True,
        #   vpc_name=vpc_name)
        #
        vpc = aws_ec2.Vpc(
            self,
            "EKKStackVPC",
            max_azs=3,
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html
        #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize
        ec2_instance_type = aws_ec2.InstanceType.of(
            aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM)

        sg_bastion_host = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an bastion host',
            security_group_name='bastion-host-sg')
        cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg')

        #TODO: SHOULD restrict IP range allowed to ssh acces
        sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"),
                                         connection=aws_ec2.Port.tcp(22),
                                         description='SSH access')

        bastion_host = aws_ec2.Instance(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=ec2_instance_type,
            machine_image=aws_ec2.MachineImage.latest_amazon_linux(),
            vpc_subnets=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC),
            security_group=sg_bastion_host,
            key_name=EC2_KEY_PAIR_NAME.value_as_string)

        sg_use_opensearch = aws_ec2.SecurityGroup(
            self,
            "OpenSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch client',
            security_group_name='use-opensearch-cluster-sg')
        cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg')

        sg_opensearch_cluster = aws_ec2.SecurityGroup(
            self,
            "OpenSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for an opensearch cluster',
            security_group_name='opensearch-cluster-sg')
        cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_opensearch_cluster,
            connection=aws_ec2.Port.all_tcp(),
            description='opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp(443),
            description='use-opensearch-cluster-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_use_opensearch,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='use-opensearch-cluster-sg')

        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp(443),
            description='bastion-host-sg')
        sg_opensearch_cluster.add_ingress_rule(
            peer=sg_bastion_host,
            connection=aws_ec2.Port.tcp_range(9200, 9300),
            description='bastion-host-sg')

        master_user_secret = aws_secretsmanager.Secret(
            self,
            "OpenSearchMasterUserSecret",
            generate_secret_string=aws_secretsmanager.SecretStringGenerator(
                secret_string_template=json.dumps({"username": "******"}),
                generate_string_key="password",
                # Master password must be at least 8 characters long and contain at least one uppercase letter,
                # one lowercase letter, one number, and one special character.
                password_length=8))

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        # You should camelCase the property names instead of PascalCase
        opensearch_domain = aws_opensearchservice.Domain(
            self,
            "OpenSearch",
            domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string,
            version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0,
            #XXX: You cannot use graviton instances with non-graviton instances.
            # Use graviton instances as data nodes or use non-graviton instances as master nodes.
            capacity={
                "master_nodes": 3,
                "master_node_instance_type": "r6g.large.search",
                "data_nodes": 3,
                "data_node_instance_type": "r6g.large.search"
            },
            ebs={
                "volume_size": 10,
                "volume_type": aws_ec2.EbsDeviceVolumeType.GP2
            },
            #XXX: az_count must be equal to vpc subnets count.
            zone_awareness={"availability_zone_count": 3},
            logging={
                "slow_search_log_enabled": True,
                "app_log_enabled": True,
                "slow_index_log_enabled": True
            },
            fine_grained_access_control=aws_opensearchservice.
            AdvancedSecurityOptions(
                master_user_name=master_user_secret.secret_value_from_json(
                    "username").to_string(),
                master_user_password=master_user_secret.secret_value_from_json(
                    "password")),
            # Enforce HTTPS is required when fine-grained access control is enabled.
            enforce_https=True,
            # Node-to-node encryption is required when fine-grained access control is enabled
            node_to_node_encryption=True,
            # Encryption-at-rest is required when fine-grained access control is enabled.
            encryption_at_rest={"enabled": True},
            use_unsigned_basic_auth=True,
            security_groups=[sg_opensearch_cluster],
            automated_snapshot_start_hour=17,  # 2 AM (GTM+9)
            vpc=vpc,
            vpc_subnets=[
                aws_ec2.SubnetSelection(
                    one_per_az=True,
                    subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT)
            ],
            removal_policy=cdk.RemovalPolicy.
            DESTROY  # default: cdk.RemovalPolicy.RETAIN
        )
        cdk.Tags.of(opensearch_domain).add(
            'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}')

        S3_BUCKET_SUFFIX = ''.join(
            random.sample((string.ascii_lowercase + string.digits), k=7))
        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            removal_policy=cdk.RemovalPolicy.
            DESTROY,  #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned
            bucket_name="opskk-stack-{region}-{suffix}".format(
                region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX))

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=["*"],
                actions=[
                    "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute",
                    "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups",
                    "ec2:DescribeNetworkInterfaces",
                    "ec2:CreateNetworkInterface",
                    "ec2:CreateNetworkInterfacePermission",
                    "ec2:DeleteNetworkInterface"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                resources=[
                    opensearch_domain.domain_arn,
                    "{}/*".format(opensearch_domain.domain_arn)
                ],
                actions=[
                    "es:DescribeElasticsearchDomain",
                    "es:DescribeElasticsearchDomains",
                    "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost",
                    "es:ESHttpPut"
                ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/
                resources=[
                    opensearch_domain.domain_arn,
                    f"{opensearch_domain.domain_arn}/_all/_settings",
                    f"{opensearch_domain.domain_arn}/_cluster/stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%",
                    f"{opensearch_domain.domain_arn}/_nodes",
                    f"{opensearch_domain.domain_arn}/_nodes/stats",
                    f"{opensearch_domain.domain_arn}/_nodes/*/stats",
                    f"{opensearch_domain.domain_arn}/_stats",
                    f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats"
                ],
                actions=["es:ESHttpGet"]))

        firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(
                        service="logs",
                        resource="log-group",
                        resource_name="{}:log-stream:*".format(
                            firehose_log_group_name),
                        arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME)
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "KinesisFirehoseServiceRole",
            role_name=
            f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty(
            role_arn=firehose_role.role_arn,
            security_group_ids=[sg_use_opensearch.security_group_id],
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids)

        opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty(
            index_name=OPENSEARCH_INDEX_NAME.value_as_string,
            role_arn=firehose_role.role_arn,
            s3_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Backup"
                },
                "compressionFormat":
                "UNCOMPRESSED",  # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP]
                # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify
                # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console.
                "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/",
                "roleArn": firehose_role.role_arn
            },
            buffering_hints={
                "intervalInSeconds": 60,
                "sizeInMBs": 1
            },
            cloud_watch_logging_options={
                "enabled": True,
                "logGroupName": firehose_log_group_name,
                "logStreamName": "ElasticsearchDelivery"
            },
            domain_arn=opensearch_domain.domain_arn,
            index_rotation_period=
            "NoRotation",  # [NoRotation | OneDay | OneHour | OneMonth | OneWeek]
            retry_options={"durationInSeconds": 60},
            s3_backup_mode=
            "FailedDocumentsOnly",  # [AllDocuments | FailedDocumentsOnly]
            vpc_configuration=opensearch_dest_vpc_config)

        firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "KinesisFirehoseToES",
            delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string,
            delivery_stream_type="DirectPut",
            elasticsearch_destination_configuration=opensearch_dest_config,
            tags=[{
                "key": "Name",
                "value": OPENSEARCH_INDEX_NAME.value_as_string
            }])

        cdk.CfnOutput(self,
                      'BastionHostId',
                      value=bastion_host.instance_id,
                      export_name='BastionHostId')
        cdk.CfnOutput(self,
                      'OpenSearchDomainEndpoint',
                      value=opensearch_domain.domain_endpoint,
                      export_name='OpenSearchDomainEndpoint')
        cdk.CfnOutput(
            self,
            'OpenSearchDashboardsURL',
            value=f"{opensearch_domain.domain_endpoint}/_dashboards/",
            export_name='OpenSearchDashboardsURL')
        cdk.CfnOutput(self,
                      'MasterUserSecretId',
                      value=master_user_secret.secret_name,
                      export_name='MasterUserSecretId')
        cdk.CfnOutput(self,
                      '{}_S3DestBucket'.format(self.stack_name),
                      value=s3_bucket.bucket_name,
                      export_name='S3DestBucket')
        cdk.CfnOutput(self,
                      'FirehoseRoleArn',
                      value=firehose_role.role_arn,
                      export_name='FirehoseRoleArn')
    def __init__(self, scope: core.Construct, construct_id: str,
                 stack_log_level: str, src_stream, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here
        # Create an S3 Bucket for storing streaming data events from firehose
        fh_data_store = _s3.Bucket(self,
                                   "fhDataStore",
                                   removal_policy=core.RemovalPolicy.DESTROY,
                                   auto_delete_objects=False)

        firehose_delivery_stream_name = f"phi_data_filter"

        # Firehose Lambda Transformer
        # Read Lambda Code
        try:
            with open(
                    "sensitive_data_filter_instream/stacks/back_end/firehose_transformation_stack/lambda_src/kinesis_firehose_transformer.py",
                    encoding="utf-8",
                    mode="r") as f:
                fh_transformer_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")
            raise

        fh_transformer_fn = _lambda.Function(
            self,
            "fhDataTransformerFn",
            function_name=f"fh_data_transformer",
            description="Transform incoming data events with newline character",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(fh_transformer_fn_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(60),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "APP_ENV": "Production",
            })

        # Create Custom Loggroup for Producer
        fh_transformer_fn_lg = _logs.LogGroup(
            self,
            "fhDataTransformerFnLogGroup",
            log_group_name=f"/aws/lambda/{fh_transformer_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY)

        fh_delivery_role = _iam.Role(
            self,
            "fhDeliveryRole",
            # role_name="FirehoseDeliveryRole",
            assumed_by=_iam.ServicePrincipal("firehose.amazonaws.com"),
            external_id=core.Aws.ACCOUNT_ID,
        )

        # Add permissions to allow Kinesis Fireshose to Write to S3
        roleStmt1 = _iam.PolicyStatement(effect=_iam.Effect.ALLOW,
                                         resources=[
                                             f"{fh_data_store.bucket_arn}",
                                             f"{fh_data_store.bucket_arn}/*"
                                         ],
                                         actions=[
                                             "s3:AbortMultipartUpload",
                                             "s3:GetBucketLocation",
                                             "s3:GetObject", "s3:ListBucket",
                                             "s3:ListBucketMultipartUploads",
                                             "s3:PutObject"
                                         ])
        # roleStmt1.add_resources(
        #     fh_data_store.arn_for_objects("*")
        # )
        roleStmt1.sid = "AllowKinesisToWriteToS3"
        fh_delivery_role.add_to_policy(roleStmt1)

        # Add permissions to Kinesis Fireshose to Write to CloudWatch Logs
        roleStmt2 = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[
                f"arn:aws:logs:{core.Aws.REGION}:{core.Aws.ACCOUNT_ID}:log-group:/aws/kinesisfirehose/{firehose_delivery_stream_name}:log-stream:*"
            ],
            actions=["logs:PutLogEvents"])
        roleStmt2.sid = "AllowKinesisToWriteToCloudWatch"
        fh_delivery_role.add_to_policy(roleStmt2)

        # Add permissions to Kinesis Fireshose to Invoke Lambda for Transformations
        roleStmt3 = _iam.PolicyStatement(
            effect=_iam.Effect.ALLOW,
            resources=[f"{fh_transformer_fn.function_arn}"],
            actions=["lambda:InvokeFunction"])
        roleStmt3.sid = "AllowKinesisToInvokeLambda"
        fh_delivery_role.add_to_policy(roleStmt3)

        # Add permissions to Kinesis Fireshose to Read from Kinesis Data Stream
        policy_to_allow_fh_to_read_stream = _iam.Policy(
            self,
            "allowKinesisFhToReadKinesisDataStream",
            roles=[fh_delivery_role],
            statements=[
                _iam.PolicyStatement(
                    effect=_iam.Effect.ALLOW,
                    resources=[f"{src_stream.stream_arn}"],
                    sid="AllowKinesisFhToReadKinesisDataStream",
                    actions=[
                        "kinesis:DescribeStream", "kinesis:GetShardIterator",
                        "kinesis:GetRecords", "kinesis:ListShards"
                    ])
            ])

        self.fh_to_s3 = _kinesis_fh.CfnDeliveryStream(
            self,
            "fhDeliveryStream",
            delivery_stream_name=f"{firehose_delivery_stream_name}",
            delivery_stream_type=f"KinesisStreamAsSource",
            kinesis_stream_source_configuration=_kinesis_fh.CfnDeliveryStream.
            KinesisStreamSourceConfigurationProperty(
                kinesis_stream_arn=f"{src_stream.stream_arn}",
                role_arn=f"{fh_delivery_role.role_arn}"),
            extended_s3_destination_configuration=_kinesis_fh.
            CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
                bucket_arn=fh_data_store.bucket_arn,
                buffering_hints=_kinesis_fh.CfnDeliveryStream.
                BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=1),
                compression_format="UNCOMPRESSED",
                prefix=f"phi-data/",
                # prefix="phi-data/date=!{timestamp:yyyy}-!{timestamp:MM}-!{timestamp:dd}/",
                role_arn=fh_delivery_role.role_arn,
                processing_configuration=_kinesis_fh.CfnDeliveryStream.
                ProcessingConfigurationProperty(
                    enabled=True,
                    processors=[
                        _kinesis_fh.CfnDeliveryStream.ProcessorProperty(
                            parameters=[
                                _kinesis_fh.CfnDeliveryStream.
                                ProcessorParameterProperty(
                                    parameter_name="LambdaArn",
                                    parameter_value=fh_transformer_fn.
                                    function_arn,
                                )
                            ],
                            type="Lambda",
                        )
                    ]),
            ),
        )

        self.fh_to_s3.add_depends_on(
            policy_to_allow_fh_to_read_stream.node.default_child)

        # Restrict Transformer Lambda to be invoked by Firehose only from the stack owner account
        _lambda.CfnPermission(
            self,
            "restrictLambdaInvocationToFhInOwnAccount",
            action="lambda:InvokeFunction",
            function_name=fh_transformer_fn.function_arn,
            principal="firehose.amazonaws.com",
            source_account=core.Aws.ACCOUNT_ID,
            source_arn=self.fh_to_s3.attr_arn,
        )

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = core.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page."
        )

        output_1 = core.CfnOutput(
            self,
            "FirehoseArn",
            value=
            f"https://console.aws.amazon.com/firehose/home?region={core.Aws.REGION}#/details/{self.fh_to_s3.delivery_stream_name}",
            description=
            "Produce streaming data events and push to Kinesis stream.")
        output_2 = core.CfnOutput(
            self,
            "FirehoseDataStore",
            value=
            f"https://console.aws.amazon.com/s3/buckets/{fh_data_store.bucket_name}",
            description="The firehose datastore bucket")

        output_3 = core.CfnOutput(
            self,
            "SensitiveDataFilter",
            value=
            f"https://console.aws.amazon.com/lambda/home?region={core.Aws.REGION}#/functions/{fh_transformer_fn.function_name}",
            description="Filter Sensitive data from event.")