Exemplo n.º 1
0
    def __configure_base(self) -> None:
        """
    Setup base dependencies
    """
        self.__quotes_stream = k.Stream(self,
                                        'QuoteStream',
                                        encryption=k.StreamEncryption.MANAGED,
                                        retention_period=core.Duration.days(7),
                                        shard_count=1,
                                        stream_name='finsurf-incoming-quotes')

        self.__fundamental_stream = k.Stream(
            self,
            'FundamentalStream',
            encryption=k.StreamEncryption.MANAGED,
            retention_period=core.Duration.days(7),
            shard_count=1,
            stream_name='finsurf-incoming-fundamentals')

        self.pm_compute_cluster = ecs.Cluster(self, 'Cluster', vpc=self.vpc)
        self.security_group = ec2.SecurityGroup(
            self,
            'CollectorComponents',
            vpc=self.vpc,
            allow_all_outbound=True,
            description='Security Group for the CollectorLayer')
Exemplo n.º 2
0
    def __configure_ingestion(self) -> None:
        self.__updates_stream = k.Stream(
            self,
            'PortfolioUpdates',
            encryption=k.StreamEncryption.MANAGED,
            retention_period=core.Duration.days(7),
            shard_count=1,
            stream_name='portfolio-updates')

        self.__updates_handler = PythonLambda(
            self,
            'UpdatesHandler',
            build_prefix='artifacts/FinSurf-PortfolioMgmt-UpdatesHandler',
            handler='updates_handler.lambda_handler',
            subnet_group_name='PortfolioMgmt',
            context=self.context,
            securityGroups=[self.security_group]).function

        self.updates_handler.add_event_source(source=evt.KinesisEventSource(
            stream=self.updates_stream,
            starting_position=lambda_.StartingPosition.LATEST))

        # Configure writing to neptune
        self.updates_handler.add_environment(
            key='NEPTUNE_ENDPOINT', value=self.neptune_cluster.attr_endpoint)
Exemplo n.º 3
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)
        s3bucket = s3.Bucket(self, 'vika-yy')
        kds = data_stream.Stream(self, 'data_stream', shard_count=1)

        delivery_stream_role = iam.Role(
            self,
            'kdfdelivery_stream_role_role',
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'))
        delivery_stream_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'AmazonKinesisFullAccess'))
        delivery_stream_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=[s3bucket.bucket_arn],
                                actions=["s3:*"]))

        #s3bucket = s3.Bucket(self, 'vika-yy',bucket_name='yellowtaxicdk-input')
        s3_dest_config = delivery_stream.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=s3bucket.bucket_arn,
            buffering_hints=delivery_stream.CfnDeliveryStream.
            BufferingHintsProperty(interval_in_seconds=60, size_in_m_bs=128),
            role_arn=delivery_stream_role.role_arn,
            compression_format='UNCOMPRESSED',
            s3_backup_mode='Disabled')

        stream_source_config = delivery_stream.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty(
            kinesis_stream_arn=kds.stream_arn,
            role_arn=delivery_stream_role.role_arn)

        kfirehose = delivery_stream.CfnDeliveryStream(
            self,
            'kfirehose',
            delivery_stream_name='deliverystream',
            delivery_stream_type='KinesisStreamAsSource',
            extended_s3_destination_configuration=s3_dest_config,
            kinesis_stream_source_configuration=stream_source_config)
        glue_role = iam.Role(
            self,
            'glue_role',
            assumed_by=iam.ServicePrincipal('glue.amazonaws.com'))
        glue_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=[s3bucket.bucket_arn],
                                actions=["s3:*"]))
        glue_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                'service-role/AWSGlueServiceRole'))

        bucket_name = s3bucket.bucket_name
        glue_crawler = glue.CfnCrawler(
            self,
            'glue_crawler',
            database_name='yellow-taxis',
            role=glue_role.role_arn,
            #targets={"s3Targets": [{"path": f'{BUCKET}/input/'}]}
            targets={"s3Targets": [{
                "path": f'{bucket_name}/input/'
            }]})
Exemplo n.º 4
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        my_table = _dynamodb.Table(self,
                                   id='dynamoTable',
                                   table_name='testcdktable',
                                   partition_key=_dynamodb.Attribute(
                                       name='lastname',
                                       type=_dynamodb.AttributeType.STRING))

        my_stream = _kinesis.Stream(self,
                                    id='kinesistream',
                                    stream_name='cdkkinesisstream')

        my_bucket = _s3.Bucket(self, id='s3bucket', bucket_name='rajcdkbucket')

        my_lambda = _lambda.Function(self,
                                     id='lambdafunction',
                                     runtime=_lambda.Runtime.PYTHON_3_7,
                                     handler='hello.handler',
                                     code=_lambda.Code.asset('lambdacode'))

        my_api = _apigateway.LambdaRestApi(self,
                                           id='lambdaapi',
                                           rest_api_name='cdkapi',
                                           handler=my_lambda)
    def __init__(self, scope: core.Construct, id: str, shard_count: int,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here):
        ##############################################
        #######   CDK STABILITY EXPERIMENTAL   #######
        ##############################################

        self.kinesis_data_pipe = _kinesis.Stream(
            self,
            "dataPipe",
            # retention_period_hours=24,
            # retention_period=core.Duration.minutes(300),
            shard_count=1,
            stream_name="data_pipe")

        self.data_pipe_ssm_param = _ssm.StringParameter(
            self,
            "dataPipeParameter",
            description="Kinesis Stream Name",
            parameter_name=
            f"/{global_args.REPO_NAME}/streams/data_pipe/stream_name",
            string_value=f"{self.kinesis_data_pipe.stream_name}")

        output_0 = core.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{global_args.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page."
        )
Exemplo n.º 6
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        kms_policy = iam.PolicyDocument()
        kms_policy.add_statements(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=["kms:*"],
                                resources=['*'],
                                principals=[iam.AccountPrincipal(account_id=self.account),
                                            iam.ServicePrincipal(service="lambda.amazonaws.com")]
                                )
        )

        self._kinesis_key = kms.Key(self,
                               "volumeKey",
                               enable_key_rotation=True,
                               policy=kms_policy,
                               removal_policy=core.RemovalPolicy.RETAIN
                               )

        self._kinesis_stream = kinesis.Stream(self,
                                              id,
                                              encryption_key=self.kinesis_key,
                                              retention_period=core.Duration.hours(24),
                                              shard_count=1,
                                              stream_name="PaymentStream"
                                              )
    def create_kinesis_stream(self, stream_name):
        kinesis_stream = kinesis.Stream(
            self,
            "stock_stream",
            stream_name=stream_name
            )

        return kinesis_stream
Exemplo n.º 8
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        stream = kds.Stream(self,
                            "InputStream",
                            shard_count=1,
                            retention_period=Duration.hours(24))

        self._stream = stream
Exemplo n.º 9
0
    def __init__(self, scope: core.Construct, _id: str, **kwargs) -> None:

        new_kwargs = {'env': kwargs['env']}

        super().__init__(scope, _id, **new_kwargs)

        # Create staging Kinesis Data Stream, set to 1 shard in this sample code
        kinesis_stream = kinesis.Stream(self,
                                        STREAM_NAME,
                                        stream_name=STREAM_NAME,
                                        shard_count=1)
        # Create replicator lambda function that consumes the Kinesis stream and writes to target DDB table
        target_table_name = kwargs['target_table_name']
        dlq_sqs = sqs.Queue(self, 'replicator_failure_Q')
        replicator_lambda = lambda_.Function(
            self,
            'replicator_kinesis',
            code=lambda_.Code.asset("../lambda_replicator"),
            runtime=lambda_.Runtime.PYTHON_3_7,
            handler='replicator_kinesis.lambda_handler',
            timeout=core.Duration.seconds(60),
            environment={'TARGET_TABLE': target_table_name})
        kinesis_stream.grant_read(replicator_lambda)
        replicator_lambda.add_event_source(
            KinesisEventSource(
                stream=kinesis_stream,
                starting_position=lambda_.StartingPosition.LATEST,
                batch_size=500,
                retry_attempts=100,
                parallelization_factor=10,
                on_failure=SqsDlq(dlq_sqs)))
        target_table = ddb.Table.from_table_name(self, target_table_name,
                                                 target_table_name)
        target_table.grant_read_write_data(replicator_lambda)

        # The replicator lambda will put metrics to Cloudwatch
        put_metrics_policy = PolicyStatement(
            actions=['cloudwatch:PutMetricData'],
            effect=Effect.ALLOW,
            resources=['*'])
        replicator_lambda.add_to_role_policy(put_metrics_policy)

        # Create replicator-stats table for statistics of replicator
        replicator_stats_table = ddb.Table(
            self,
            REPLICATOR_STATS_TABLE_NAME,
            table_name=REPLICATOR_STATS_TABLE_NAME,
            partition_key=ddb.Attribute(name="PK",
                                        type=ddb.AttributeType.STRING),
            billing_mode=ddb.BillingMode.PAY_PER_REQUEST,
            removal_policy=RemovalPolicy.DESTROY)
        replicator_stats_table.grant_read_write_data(replicator_lambda)
        core.CfnOutput(self,
                       "replicator_stats_table",
                       value=replicator_stats_table.table_name)
Exemplo n.º 10
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # TO-DO: Resolve how to surface this through a property rather than directly.
        self.kinesis_stream = kinesis.Stream(
            self,
            "kinesisStream",
            retention_period=core.Duration.days(3),
            stream_name=core.PhysicalName.GENERATE_IF_NEEDED,
        )
        core.CfnOutput(
            self,
            "kinesisDataStreamArn",
            value=self.kinesis_stream.stream_arn,
        )

        bucket = s3.Bucket.from_bucket_name(self,
                                            "s3Bucket",
                                            bucket_name=OUTPUT_BUCKET)

        kf_role = iam.Role(
            self,
            "kinesisFirehoseRole",
            assumed_by=iam.ServicePrincipal("firehose.amazonaws.com"),
        )

        kf_props = kf.CfnDeliveryStreamProps(
            extended_s3_destination_configuration=kf.CfnDeliveryStream.
            ExtendedS3DestinationConfigurationProperty(
                role_arn=kf_role.role_arn,
                bucket_arn=bucket.bucket_arn,
                prefix="twitter_data_",
                error_output_prefix="failures",
            ))

        data_pipeline = (
            kinesis_data_pipeline.KinesisStreamsToKinesisFirehoseToS3(
                self,
                "dataPipeline",
                existing_stream_obj=self.kinesis_stream,
                existing_bucket_obj=bucket,
                kinesis_firehose_props=kf_props,
            ))

        self.kinesis_firehose = data_pipeline.kinesis_firehose

        bucket.grant_write(kf_role)
Exemplo n.º 11
0
    def __init__(self, scope: core.Construct, id: str, ctx: object,
                 **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Kinesis Data Stream for telemetry buffer
        self.kinesis_stream = ks.Stream(
            scope=self,
            id="logstash_queue",
            shard_count=ctx.queue.kinesis_shard_count)
        self.state_table = ddb.Table(
            scope=self,
            id="logstash_state",
            partition_key=ddb.Attribute(name="leaseKey",
                                        type=ddb.AttributeType.STRING),
            billing_mode=ddb.BillingMode.PAY_PER_REQUEST,
            removal_policy=core.RemovalPolicy.DESTROY)
Exemplo n.º 12
0
    def __init__(self, app: core.App, id: str) -> None:
        super().__init__(app, id)

        with open("lambda-handler.py", encoding="utf8") as fp:
            handler_code = fp.read()

        Kstream = kinesis_.Stream(self,
                                  "KinesisSagemakerInference",
                                  encryption=None,
                                  encryption_key=None,
                                  retention_period_hours=24,
                                  shard_count=1)

        lambdaFn = lambda_.Function(
            self,
            "KinesisSMLambda",
            code=lambda_.InlineCode(handler_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(300),
            runtime=lambda_.Runtime.PYTHON_3_7,
            environment={
                "endpoint_name":
                endpoint_name,  # CHANGE TO YOUR ENDPOINT NAME!!
                "content_type": "text/csv",
                "input_data": input_data,
                "bucket": bucket,
                "key": key
            })

        lambdaFn.add_to_role_policy(
            aws_iam.PolicyStatement(
                actions=[
                    'sagemaker:InvokeEndpoint',
                ],
                resources=[
                    'arn:aws:sagemaker:{}:{}:endpoint/{}'.format(
                        my_region, my_acc_id, endpoint_name),
                ]))

        # Add the Kinesis stream as Lambda source
        lambdaFn.add_event_source(
            aws_lambda_event_sources.KinesisEventSource(
                Kstream, starting_position=lambda_.StartingPosition.LATEST))

        # Add stream read permissions
        Kstream.grant_read(lambdaFn.role)
Exemplo n.º 13
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        ZachStreamName = "ZachWang"
        ksStream = ks.Stream(
            self,
            id=ZachStreamName,
            stream_name=ZachStreamName,
            encryption=ks.StreamEncryption.KMS,
            encryption_key=self.GenerateKmsKey(ZachStreamName),
            shard_count=2,
            retention_period_hours=168)
        core.CfnOutput(self,
                       ZachStreamName + "Stream-ARN",
                       value=ksStream.stream_arn)
        core.CfnOutput(self,
                       ZachStreamName + "Stream-Name",
                       value=ksStream.stream_name)
Exemplo n.º 14
0
def base_kinesis_stream(construct, **kwargs):
    """
    Function that generates a Kinesis Data Stream.
    :param construct: Custom construct that will use this function. From the external construct is usually 'self'.
    :param kwargs: Consist of required 'stream_name'.
    :return: Kinesis Stream Construct.
    """
    stream_name = construct.prefix + "_" + kwargs[
        "stream_name"] + "_" + "stream" + "_" + construct.environment_
    stream_retention_period = Duration.hours(
        kwargs["retention_period"]) if kwargs.get(
            "retention_period") is not None else None
    kinesis_stream = stream.Stream(
        construct,
        id=stream_name,
        stream_name=stream_name,
        shard_count=kwargs["shard_count"],
        retention_period=stream_retention_period,
    )

    return kinesis_stream
Exemplo n.º 15
0
    def __init__(self, scope, id, **kwargs):
        super().__init__(scope, id, **kwargs)

        self.prefix = MY_PREFIX

        # Destination Bucket on S3
        self.s3_bucket = _s3.Bucket(
            self,
            "storage",
            bucket_name=f"{self.prefix}-data-store",
        )

        # Event Stream
        self.stream = _kns.Stream(self,
                                  "stream",
                                  stream_name=f"{self.prefix}-stream",
                                  shard_count=1)

        # API Endpoint
        self.create_api_endpoint()

        # Event Processing
        self.create_stream_processor()
Exemplo n.º 16
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        my_table = _dynamodb.Table(self,
                                   id='dynamoTable',
                                   table_name='testcdktable',
                                   partition_key=_dynamodb.Attribute(
                                       name='lastname',
                                       type=_dynamodb.AttributeType.STRING))

        my_stream = _kinesis.Stream(self,
                                    id='kinesistream',
                                    stream_name='cdkkinesisstream')

        my_bucket = _s3.Bucket(self, id='s3bucket', bucket_name='rajcdkbucket')

        my_lambda = _lambda.Function(self,
                                     id='lambdafunction',
                                     runtime=_lambda.Runtime.PYTHON_3_7,
                                     handler='hello.handler',
                                     code=_lambda.Code.asset('lambdacode'))

        my_api = _apigateway.LambdaRestApi(self,
                                           id='lambdaapi',
                                           rest_api_name='cdkapi',
                                           handler=my_lambda)

        api_with_method = _apigateway.RestApi(self,
                                              id='restapi',
                                              rest_api_name='cdkrestapi_music')
        #music = api_with_method.root.addResource('music')
        #music.addMethod('GET')
        music = api_with_method.root.add_resource('music')
        music.add_method('GET')
        music.add_method("DELETE",
                         _apigateway.HttpIntegration("http://aws.amazon.com"))
Exemplo n.º 17
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        #kinesis data stream
        kinesis_stream = aws_kinesis.Stream(
            self,
            "kinesisStream",
            retention_period=core.Duration.hours(24),
            shard_count=1,
            stream_name="kinesis_test_data_pipe")

        #s3 for store stream data events
        kinesis_s3_bucket = aws_s3.Bucket(
            self, "kinesisS3Bucket", removal_policy=core.RemovalPolicy.DESTROY)

        #Lambda functions

        #import function codes - data consume
        try:
            with open("deployments/functions/stream_data_get.py",
                      mode="r") as file:
                function_body_get = file.read()
        except OSError:
            print('File can not read')

        #consume function
        stream_get_function = aws_lambda.Function(
            self,
            "consumeFunction",
            function_name="StreamConsumeFunction",
            description="Process Data Streams and store to s3",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body_get),
            timeout=core.Duration.seconds(5),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "BUCKET_NAME": f"{kinesis_s3_bucket.bucket_name}"
            })

        #permision to use stream by lambda
        kinesis_stream.grant_read(stream_get_function)

        #s3 permision
        lambdas3Permision = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[f"{kinesis_s3_bucket.bucket_arn}/*"],
            actions=["s3:PutObject"])
        lambdas3Permision.sid = "S3writePermisionToLambda"
        stream_get_function.add_to_role_policy(lambdas3Permision)

        #logs
        stream_concume_logs = aws_logs.LogGroup(
            self,
            "StreamConcumeLogs",
            log_group_name=f"/aws/lambda/{stream_get_function.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)

        #kinesis event source
        kinesis_event_sources = aws_lambda_es.KinesisEventSource(
            stream=kinesis_stream,
            starting_position=aws_lambda.StartingPosition.LATEST,
            batch_size=1)

        #attached kinesis to lambda
        stream_get_function.add_event_source(kinesis_event_sources)

        #generate stream lambda function#####

        #import function codes - data consume
        try:
            with open("deployments/functions/stream_data_gen.py",
                      mode="r") as file:
                function_body_gen = file.read()
        except OSError:
            print('File can not read')

        #stream generate function
        stream_gen_function = aws_lambda.Function(
            self,
            "GenarateFunction",
            function_name="StreamGenerateFunction",
            description="Generate Data Streams",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            handler="index.lambda_handler",
            code=aws_lambda.InlineCode(function_body_gen),
            timeout=core.Duration.seconds(60),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "STREAM_NAME": f"{kinesis_stream.stream_name}"
            })

        #permision to lambda to write into kinesis
        kinesis_stream.grant_read_write(stream_gen_function)

        #logs
        stream_generate_logs = aws_logs.LogGroup(
            self,
            "StreamGenerateLogs",
            log_group_name=f"/aws/lambda/{stream_gen_function.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=aws_logs.RetentionDays.ONE_DAY)
Exemplo n.º 18
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        kda_src_bucket_name = core.CfnParameter(
            self,
            "kda_src_bucket_name",
            type="String",
            description=
            "The name of the Amazon S3 bucket where uploaded files will be stored."
        )

        kda_output_bucket_name = core.CfnParameter(
            self,
            "kda_output_bucket_name",
            type="String",
            description=
            "The name of the Amazon S3 bucket KDA output via Firehose will be stored."
        )

        sourceStreamName = core.CfnParameter(
            self,
            "sourceStreamName",
            type="String",
            description="The name of the Kinesis Data Stream.",
            default="BikeRideGenerator")

        deliveryStreamName = core.CfnParameter(
            self,
            "deliveryStreamName",
            type="String",
            description="The name of the Kinesis Firehose output stream.",
            default="BikeAnalyticsOutput")

        # Create S3 buckets
        kda_src_bucket = s3.Bucket(
            self,
            "kda_src_bucket",
            bucket_name=kda_src_bucket_name.value_as_string,
            versioned=False,
            removal_policy=core.RemovalPolicy.DESTROY)
        kda_output_bucket = s3.Bucket(
            self,
            "kda_output_bucket",
            bucket_name=kda_output_bucket_name.value_as_string,
            versioned=False,
            removal_policy=core.RemovalPolicy.DESTROY)

        # create Kinesis Source Stream
        sourceStream = kds.Stream(self,
                                  "sourceStream",
                                  stream_name=sourceStreamName.value_as_string,
                                  shard_count=10)

        # Firehose Role aws_cdk.aws_iam.CfnRole
        fhIAMRole = iam.Role(
            self,
            "fhIAMRole",
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'),
            role_name="BikeRideFirehoseDeliveryRole",
            description="FireHose Delivery S3 Role")

        fhIAMRole.add_to_policy(
            iam.PolicyStatement(resources=[kda_output_bucket.bucket_arn],
                                actions=['s3:*']))

        # create Firehose delivery stream
        fhS3Delivery = fh.CfnDeliveryStream.ExtendedS3DestinationConfigurationProperty(
            bucket_arn=kda_output_bucket.bucket_arn,
            role_arn=fhIAMRole.role_arn)

        deliveryStream = fh.CfnDeliveryStream(
            self,
            "deliveryStream",
            delivery_stream_name=deliveryStreamName.value_as_string,
            extended_s3_destination_configuration=fhS3Delivery)

        # ec2 instance
        # VPC
        vpc = ec2.Vpc(self,
                      "KDA-VPC",
                      nat_gateways=0,
                      subnet_configuration=[
                          ec2.SubnetConfiguration(
                              name="public", subnet_type=ec2.SubnetType.PUBLIC)
                      ])

        # AMI
        amzn_linux = ec2.MachineImage.latest_amazon_linux(
            generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=ec2.AmazonLinuxEdition.STANDARD,
            virtualization=ec2.AmazonLinuxVirt.HVM,
            storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE)

        # Instance Role and SSM Managed Policy
        ec2role = iam.Role(
            self,
            "InstanceSSM",
            assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"))

        ec2role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "service-role/AmazonEC2RoleforSSM"))

        ec2role.add_to_policy(
            iam.PolicyStatement(resources=[sourceStream.stream_arn],
                                actions=['kinesis:*']))
        user_data = "#!/bin/bash\n"
        user_data += "echo export KINESIS_STREAM=" + sourceStreamName.value_as_string + " | sudo tee -a /etc/profile\n"
        user_data += "source /etc/profile\n"
        user_data += user_data_file

        # Instance
        instance = ec2.Instance(self,
                                "Instance",
                                instance_type=ec2.InstanceType("t3.small"),
                                machine_image=amzn_linux,
                                vpc=vpc,
                                role=ec2role,
                                user_data=ec2.UserData.custom(user_data))
Exemplo n.º 19
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        self._wellness_kns_stream = kinesis.Stream(
            self, 'WellnessKnsStream', retention_period=core.Duration.hours(24), shard_count=1, stream_name='event.member.appointment.devInfo')
Exemplo n.º 20
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self._table = ddb.Table(self,
                                'AirTicketOrder',
                                partition_key={
                                    'name': 'customer_id',
                                    'type': ddb.AttributeType.STRING
                                },
                                stream=ddb.StreamViewType.NEW_AND_OLD_IMAGES,
                                removal_policy=core.RemovalPolicy.DESTROY)

        self.lambda_cmd = _lambda.Function(
            self,
            'CommandDDBSaver',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset("./lambda/cmd/"),
            handler="cmd.lambda_handler",
            environment={
                "ORDER_TABLE_NAME": self._table.table_name,
            })

        self._table.grant_read_write_data(self.lambda_cmd)

        # Allow Command lambda to invoke other lambda
        self.lambda_cmd.add_to_role_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                resources=["*"],
                                actions=["lambda:InvokeFunction"]))

        api = apigw.LambdaRestApi(
            self,
            "CommandEndPoint",
            handler=self.lambda_cmd,
        )

        # TODO: 因为2个AZ,可以只生命一个公网和一个私网,这样X2 AZ就会2 pub + 2 pri
        # Lambda access RDS Aurora MySQL requires VPC for security and perf
        vpc = ec2.Vpc(
            self,
            'air-ticket',
            cidr="10.125.0.0/16",
            max_azs=2,
            nat_gateways=1,
            subnet_configuration=[
                ec2.SubnetConfiguration(name="public1",
                                        cidr_mask=24,
                                        subnet_type=ec2.SubnetType.PUBLIC),
                ec2.SubnetConfiguration(name="public2",
                                        cidr_mask=24,
                                        subnet_type=ec2.SubnetType.PUBLIC),
                ec2.SubnetConfiguration(name="private1",
                                        cidr_mask=24,
                                        subnet_type=ec2.SubnetType.PRIVATE),
                ec2.SubnetConfiguration(name="private2",
                                        cidr_mask=24,
                                        subnet_type=ec2.SubnetType.PRIVATE)
            ])

        query_lambda_sg = ec2.SecurityGroup(
            self,
            'Query-Lambda-SG',
            vpc=vpc,
            description="Allows DB connections from Query Lambda SG",
        )

        sink_lambda_sg = ec2.SecurityGroup(
            self,
            'RDS-Sink-Lambda-SG',
            vpc=vpc,
            description="Allows DB connections from Sink Lambda SG",
        )

        db_name = "Demo"
        db_user_name = 'admin'
        db_user_passowrd = 'password'

        parameter_group = rds.ParameterGroup(self,
                                             "ParameterGroup",
                                             family="mysql5.7",
                                             parameters={})
        aurora_db = rds.DatabaseInstance(
            self,
            "air-ticket-db",
            master_user_password=core.SecretValue.ssm_secure(
                'AirTicket.AdminPass', version='1'),
            master_username=db_user_name,
            engine=rds.DatabaseInstanceEngine.MYSQL,
            engine_version="5.7",
            parameter_group=parameter_group,
            vpc=vpc,
            # Disable deletion protection for auto deletion
            deletion_protection=False,
            instance_class=ec2.InstanceType.of(ec2.InstanceClass.MEMORY5,
                                               ec2.InstanceSize.XLARGE),
            removal_policy=core.RemovalPolicy.DESTROY)

        self._query_handler = _lambda.Function(
            self,
            'QueryHandler',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset("./lambda/query/"),
            handler="query.lambda_handler",
            timeout=core.Duration.seconds(60),
            vpc=vpc,
            security_group=query_lambda_sg,
            environment={
                "AuroraEndpoint": aurora_db.db_instance_endpoint_address,
                "dbName": db_name,
                "dbPassword": db_user_passowrd,
                "dbUser": db_user_name
            })

        query_api = apigw.LambdaRestApi(
            self,
            "Query",
            handler=self._query_handler,
        )

        # Init DB Lambda
        self.lambda_init = _lambda.Function(
            self,
            'InitDBHandler',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset("./lambda/initdb/"),
            handler="init.lambda_handler",
            timeout=core.Duration.seconds(60),
            vpc=vpc,
            security_group=query_lambda_sg,
            environment={
                "AuroraEndpoint": aurora_db.db_instance_endpoint_address,
                "dbName": db_name,
                "dbPassword": db_user_passowrd,
                "dbUser": db_user_name
            })

        self.lambda_cmd.add_environment('INITDB_LAMBDA_NAME',
                                        self.lambda_init.function_name)

        # Create stream for fan-out
        stream_name = 'kinesis-stream-for-fanout'

        # Sync DDB stream delta to RDS Lambda
        self.lambda_sync = _lambda.Function(
            self,
            'SyncHandler',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset("./lambda/sync/"),
            handler="sync.lambda_handler",
            timeout=core.Duration.seconds(60),
            vpc=vpc,
            security_group=query_lambda_sg,
            environment={"streamName": stream_name})

        # Add DDB stream trigger to sync lambda
        self.lambda_sync.add_event_source(
            event_sources.DynamoEventSource(
                self._table,
                starting_position=_lambda.StartingPosition.TRIM_HORIZON))

        self._table.grant_stream_read(self.lambda_sync)

        # Allow init/sync lambda access MySQL
        aurora_db.connections.allow_from(
            query_lambda_sg,
            ec2.Port.tcp(3306),
            "Allow MySQL access from Query Lambda (because Aurora actually exposes PostgreSQL/MySQL on port 3306)",
        )

        aurora_db.connections.allow_from(
            sink_lambda_sg,
            ec2.Port.tcp(3306),
            "Allow MySQL access from Sink Lambda (because Aurora actually exposes PostgreSQL/MySQL on port 3306)",
        )

        strm = kinesis.Stream(self,
                              'kinesis-stream-for-fanout',
                              stream_name=stream_name)

        # Create RDS Sink Lambda
        self.lambda_rds_sink = _lambda.Function(
            self,
            'RDS_SINK_1',
            handler='rds_sinker.lambda_handler',
            code=_lambda.Code.asset("./lambda/sink/"),
            runtime=_lambda.Runtime.PYTHON_3_7,
            timeout=core.Duration.seconds(300),
            vpc=vpc,
            security_group=sink_lambda_sg,
            environment={
                "AuroraEndpoint": aurora_db.db_instance_endpoint_address,
                "dbName": db_name,
                "dbPassword": db_user_passowrd,
                "dbUser": db_user_name
            })

        # Update Lambda Permissions To Use Stream
        strm.grant_read_write(self.lambda_sync)
        strm.grant_read(self.lambda_rds_sink)

        stream_consumer = kinesis.CfnStreamConsumer(
            self,
            'lambda-efo-consumer-id',
            consumer_name='lambda-efo-consumer',
            stream_arn=strm.stream_arn)

        e_s_mappnig = _lambda.EventSourceMapping(
            self,
            'lambda-efo-consumer-event-source-mapping',
            target=self.lambda_rds_sink,
            event_source_arn=stream_consumer.stream_arn,
            batch_size=1,
            starting_position=_lambda.StartingPosition.TRIM_HORIZON,
        )

        # self.lambda_rds_sink.add_event_source_mapping(e_s_mappnig)

        # CDK below create lambda as a standand Kinesis consumer instead of EFO
        #
        # # Create New Kinesis Event Source
        # kinesis_stream_event_source = event_sources.KinesisEventSource(
        #     stream=strm,
        #     starting_position=_lambda.StartingPosition.TRIM_HORIZON,
        #     batch_size=1
        # )

        # # Attach New Event Source To Lambda
        # self.lambda_rds_sink.add_event_source(kinesis_stream_event_source)

        # Create dead letter queue and grant send permission to sync/sink lambda
        self._queue = sqs.Queue(
            self,
            "DeadLetterQueue",

            #Amazon SQS sets a visibility timeout, a period of time during which Amazon
            # SQS prevents other consumers from receiving and processing the message.
            # The default visibility timeout for a message is 30 seconds.
            # The minimum is 0 seconds. The maximum is 12 hours.
            visibility_timeout=core.Duration.seconds(300),
        )

        self._queue.grant_send_messages(self.lambda_sync)
        self._queue.grant_send_messages(self.lambda_rds_sink)

        self.lambda_sync.add_environment("DLQ_NAME", self._queue.queue_name)
    def __init__(self, scope: cdk.Construct, construct_id: str,
                 stack_log_level: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # Add your stack resources below):
        # Create Kinesis Data Stream
        self.data_pipe_stream = _kinesis.Stream(
            self,
            "dataPipeStream",
            retention_period=cdk.Duration.hours(24),
            shard_count=1,
            stream_name=f"data_pipe_{construct_id}",
        )

        ########################################
        #######                          #######
        #######   Stream Data Producer   #######
        #######                          #######
        ########################################

        # Read Lambda Code
        try:
            with open(
                    "stacks/back_end/serverless_kinesis_producer_stack/lambda_src/stream_data_producer.py",
                    encoding="utf-8",
                    mode="r",
            ) as f:
                data_producer_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")
            raise

        data_producer_fn = _lambda.Function(
            self,
            "streamDataProducerFn",
            function_name=f"data_producer_{construct_id}",
            description=
            "Produce streaming data events and push to Kinesis stream",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(data_producer_fn_code),
            handler="index.lambda_handler",
            timeout=cdk.Duration.seconds(2),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "APP_ENV": "Production",
                "MAX_MSGS_TO_PRODUCE": "5",
                "STREAM_NAME": f"{self.data_pipe_stream.stream_name}",
                "STREAM_AWS_REGION": f"{cdk.Aws.REGION}",
            },
        )

        # Grant our Lambda Producer privileges to write to Kinesis Data Stream
        self.data_pipe_stream.grant_read_write(data_producer_fn)

        data_producer_fn_version = data_producer_fn.latest_version
        data_producer_fn_version_alias = _lambda.Alias(
            self,
            "streamDataProducerFnAlias",
            alias_name="MystiqueAutomation",
            version=data_producer_fn_version,
        )

        # Create Custom Loggroup for Producer
        data_producer_lg = _logs.LogGroup(
            self,
            "streamDataProducerFnLogGroup",
            log_group_name=f"/aws/lambda/{data_producer_fn.function_name}",
            removal_policy=cdk.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY,
        )

        # Restrict Produce Lambda to be invoked only from the stack owner account
        data_producer_fn.add_permission(
            "restrictLambdaInvocationToOwnAccount",
            principal=_iam.AccountRootPrincipal(),
            action="lambda:InvokeFunction",
            source_account=cdk.Aws.ACCOUNT_ID,
        )

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = cdk.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page.",
        )

        output_1 = cdk.CfnOutput(
            self,
            "StoreOrdersEventsProducer",
            value=
            f"https://console.aws.amazon.com/lambda/home?region={cdk.Aws.REGION}#/functions/{data_producer_fn.function_name}",
            description=
            "Produce streaming data events and push to Kinesis stream.",
        )
Exemplo n.º 22
0
    def __init__(self, scope: core.Construct, id: str, vpc: ec2.IVpc,
                 runnerrole: iam.IRole, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        clusterAdmin = iam.Role(self,
                                "AdminRole",
                                assumed_by=iam.AccountRootPrincipal())

        cluster = eks.Cluster(self, 'ekscdkdemo', vpc=vpc, default_capacity=0)

        asg_worker_nodes = cluster.add_capacity(
            'eksspot-cdkdemo',
            spot_price="0.0544",
            instance_type=ec2.InstanceType('t3.medium'),
            desired_capacity=2,
            bootstrap_options=eks.BootstrapOptions(
                docker_config_json=read_docker_daemon_resource(
                    'eksbaseresource/docker-daemon.json')))

        alb_rbac = eks.KubernetesResource(
            self,
            'alb-rbac',
            cluster=cluster,
            manifest=read_k8s_resource('eksbaseresource/alb-rbac.yml'))

        asg_worker_nodes.add_to_role_policy(iampolicy)
        cluster.aws_auth.add_masters_role(clusterAdmin)
        cluster.aws_auth.add_masters_role(runnerrole)

        service_account = cluster.add_service_account("external-dns-sa",
                                                      name='external-dns-sa')

        wellnessuser_irsa = cluster.add_service_account("wellnessuser",
                                                        name='wellnessuser')

        service_account.add_to_principal_policy(dnspolicy)

        deployment = {
            "apiVersion": "apps/v1",
            "kind": "Deployment",
            "metadata": {
                "labels": {
                    "app.kubernetes.io/name": "alb-ingress-controller"
                },
                "name": "alb-ingress-controller",
                "namespace": "kube-system"
            },
            "spec": {
                "selector": {
                    "matchLabels": {
                        "app.kubernetes.io/name": "alb-ingress-controller"
                    }
                },
                "template": {
                    "metadata": {
                        "labels": {
                            "app.kubernetes.io/name": "alb-ingress-controller"
                        }
                    },
                    "spec": {
                        "containers": [{
                            "name":
                            "alb-ingress-controller",
                            "args": [
                                "--ingress-class=alb",
                                "--cluster-name=" + cluster.cluster_name
                            ],
                            "image":
                            "docker.io/amazon/aws-alb-ingress-controller:v1.1.8"
                        }],
                        "serviceAccountName":
                        "alb-ingress-controller"
                    }
                }
            }
        }
        alb_service = cluster.add_resource('alb-ingress-controller',
                                           deployment)
        external_dns = eks.KubernetesResource(
            self,
            'external-dns',
            cluster=cluster,
            manifest=read_k8s_resource('eksbaseresource/external-dns.yml'))
        alb_service.node.add_dependency(alb_rbac)
        external_dns.node.add_dependency(service_account)
        core.CfnOutput(self,
                       'ClusterAdmin_Role_ARN',
                       value=clusterAdmin.role_arn)
        core.CfnOutput(
            self,
            'Getupdateeks',
            value="aws eks update-kubeconfig --name " + cluster.cluster_name +
            " --region ap-northeast-1 --role-arn " + clusterAdmin.role_arn)

        wellness_kns_stream = kinesis.Stream(
            self,
            'WellnessKnsStream',
            retention_period=core.Duration.hours(24),
            shard_count=1,
            stream_name='event.member.appointment.devInfo')

        wellness_kns_stream.grant_read_write(wellnessuser_irsa)

        core.CfnOutput(self,
                       'kinesis_stream_arn',
                       value=wellness_kns_stream.stream_arn)

        core.CfnOutput(self,
                       'kinesis_stream_name',
                       value=wellness_kns_stream.stream_name)
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        output_bucket = s3.Bucket(self,
                                  'outputBucket',
                                  removal_policy=core.RemovalPolicy.DESTROY)

        input_data_stream = kinesis.Stream(
            self,
            'inputDataStream',
            stream_name='anomaly-detection-data-streams-input-data-stream')

        delivery_stream_role = iam.Role(
            self,
            'deliveryStreamRole',
            assumed_by=iam.ServicePrincipal('firehose.amazonaws.com'),
            inline_policies=[
                iam.PolicyDocument(statements=[
                    iam.PolicyStatement(
                        effect=iam.Effect.ALLOW,
                        actions=['kinesis:DescribeStream'],
                        resources=[input_data_stream.stream_arn])
                ])
            ])
        input_data_stream.grant_read(delivery_stream_role)

        output_bucket.grant_write(delivery_stream_role)

        anomaly_topic = sns.Topic(self, 'anomalyDetectionTopic')

        data_processing_function = _lambda.Function(
            self,
            "dataProcessingFunction",
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler="lambda-handler.main",
            code=_lambda.Code.asset("./dataProcessingFunction"),
            environment={'TOPIC_ARN': anomaly_topic.topic_arn})

        delivery_stream = firehose.CfnDeliveryStream(
            self,
            'deliveryStream',
            s3_destination_configuration=firehose.CfnDeliveryStream.
            S3DestinationConfigurationProperty(
                bucket_arn=output_bucket.bucket_arn,
                role_arn=delivery_stream_role.role_arn),
            kinesis_stream_source_configuration=firehose.CfnDeliveryStream.
            KinesisStreamSourceConfigurationProperty(
                kinesis_stream_arn=input_data_stream.stream_arn,
                role_arn=delivery_stream_role.role_arn),
            delivery_stream_type='KinesisStreamAsSource')

        anomaly_topic.grant_publish(data_processing_function)

        anomalie_detection_app_role = iam.Role(
            self,
            'anomalieDetectionAppRole',
            assumed_by=iam.ServicePrincipal('kinesisanalytics.amazonaws.com'),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    'AmazonKinesisReadOnlyAccess')
            ],
            inline_policies=[
                iam.PolicyDocument(statements=[
                    iam.PolicyStatement(
                        effect=iam.Effect.ALLOW,
                        resources=[
                            data_processing_function.function_arn,
                        ],
                        actions=[
                            "lambda:GetFunctionConfiguration",
                            "lambda:InvokeFunction",
                        ])
                ])
            ])

        # Load Application Code
        with open('anomalie_detection_sql/application.sql', 'r') as file:
            app_code = file.read()

        anomalie_detection_app = kinesis_analytics.CfnApplication(
            self,
            'anomalieDetectionApp',
            inputs=[
                kinesis_analytics.CfnApplication.InputProperty(
                    name_prefix='SOURCE_SQL_STREAM',
                    kinesis_streams_input=kinesis_analytics.CfnApplication.
                    KinesisStreamsInputProperty(
                        resource_arn=input_data_stream.stream_arn,
                        role_arn=anomalie_detection_app_role.role_arn),
                    input_schema=kinesis_analytics.CfnApplication.
                    InputSchemaProperty(
                        record_columns=[
                            kinesis_analytics.CfnApplication.
                            RecordColumnProperty(name="sensor_id",
                                                 sql_type="CHAR(30)",
                                                 mapping="$.sensor_id"),
                            kinesis_analytics.CfnApplication.
                            RecordColumnProperty(name="temperature",
                                                 sql_type="DOUBLE",
                                                 mapping="$.temperature"),
                            kinesis_analytics.CfnApplication.
                            RecordColumnProperty(name="rpm",
                                                 sql_type="DOUBLE",
                                                 mapping="$.rpm"),
                            kinesis_analytics.CfnApplication.
                            RecordColumnProperty(name="in_service",
                                                 sql_type="BOOLEAN",
                                                 mapping="$.in_service"),
                        ],
                        record_format=kinesis_analytics.CfnApplication.
                        RecordFormatProperty(
                            record_format_type='JSON',
                            mapping_parameters=kinesis_analytics.
                            CfnApplication.MappingParametersProperty(
                                json_mapping_parameters=kinesis_analytics.
                                CfnApplication.JSONMappingParametersProperty(
                                    record_row_path='$')))))
            ],
            application_code=app_code)

        anomalie_detection_app_output_lambda = kinesis_analytics.CfnApplicationOutput(
            self,
            'anomalieDetectionAppOutputLambda',
            application_name=core.Fn.ref(anomalie_detection_app.logical_id),
            output=kinesis_analytics.CfnApplicationOutput.OutputProperty(
                lambda_output=kinesis_analytics.CfnApplicationOutput.
                LambdaOutputProperty(
                    resource_arn=data_processing_function.function_arn,
                    role_arn=anomalie_detection_app_role.role_arn),
                destination_schema=kinesis_analytics.CfnApplicationOutput.
                DestinationSchemaProperty(record_format_type='JSON'),
                name='PROCESS_STREAM'))
        vpc = ec2.Vpc(
            self,
            "VPC",
            nat_gateways=1,
            subnet_configuration=[
                ec2.SubnetConfiguration(name="private",
                                        subnet_type=ec2.SubnetType.PRIVATE),
                ec2.SubnetConfiguration(name="public",
                                        subnet_type=ec2.SubnetType.PUBLIC),
            ])

        # AMI
        amzn_linux = ec2.MachineImage.latest_amazon_linux(
            generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2,
            edition=ec2.AmazonLinuxEdition.STANDARD,
            virtualization=ec2.AmazonLinuxVirt.HVM,
            storage=ec2.AmazonLinuxStorage.GENERAL_PURPOSE)
        producer_role = iam.Role(
            self,
            "producerInstanceRole",
            assumed_by=iam.ServicePrincipal("ec2.amazonaws.com"))

        producer_role.add_managed_policy(
            iam.ManagedPolicy.from_aws_managed_policy_name(
                "AmazonSSMManagedInstanceCore"))
        input_data_stream.grant_write(producer_role)

        user_data = '#!/bin/bash\n yum update -y\n yum install python3 -y\n sudo yum install -y jq\n pip3 install boto3 --user\n pip3 install numpy --user\n curl https://bigdatainsider-anomalydetection-article-fra.s3.eu-central-1.amazonaws.com/producer/producer.py -o /tmp/producer.py'

        producer_instance = ec2.Instance(
            self,
            'producerInstance',
            instance_type=ec2.InstanceType('t2.micro'),
            machine_image=amzn_linux,
            vpc=vpc,
            role=producer_role,
            user_data=ec2.UserData.custom(user_data))
Exemplo n.º 24
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 stack_log_level: str, **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # Add your stack resources below):
        # Create Kinesis Data Stream
        self.data_pipe_stream = _kinesis.Stream(
            self,
            "dataPipeStream",
            retention_period=core.Duration.hours(24),
            shard_count=1,
            stream_name="data_pipe")

        ########################################
        #######                          #######
        #######   Stream Data Producer   #######
        #######                          #######
        ########################################

        # Read Lambda Code
        try:
            with open(
                    "tumbling_window_stream_analytics/stacks/back_end/lambda_src/stream_data_producer.py",
                    encoding="utf-8",
                    mode="r") as f:
                data_producer_fn_code = f.read()
        except OSError:
            print("Unable to read Lambda Function Code")
            raise

        data_producer_fn = _lambda.Function(
            self,
            "streamDataProducerFn",
            function_name=f"data_producer_fn",
            description=
            "Produce streaming data events and push to Kinesis stream",
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.InlineCode(data_producer_fn_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(60),
            reserved_concurrent_executions=1,
            environment={
                "LOG_LEVEL": "INFO",
                "STREAM_NAME": f"{self.data_pipe_stream.stream_name}",
                "APP_ENV": "Production",
                "STREAM_AWS_REGION": f"{core.Aws.REGION}"
            })

        # Grant our Lambda Producer privileges to write to Kinesis Data Stream
        self.data_pipe_stream.grant_read_write(data_producer_fn)

        # Create Custom Loggroup for Producer
        data_producer_lg = _logs.LogGroup(
            self,
            "dataProducerLogGroup",
            log_group_name=f"/aws/lambda/{data_producer_fn.function_name}",
            removal_policy=core.RemovalPolicy.DESTROY,
            retention=_logs.RetentionDays.ONE_DAY)

        ###########################################
        ################# OUTPUTS #################
        ###########################################
        output_0 = core.CfnOutput(
            self,
            "AutomationFrom",
            value=f"{GlobalArgs.SOURCE_INFO}",
            description=
            "To know more about this automation stack, check out our github page."
        )

        output_1 = core.CfnOutput(
            self,
            "streamDataProducer",
            value=
            f"https://console.aws.amazon.com/lambda/home?region={core.Aws.REGION}#/functions/{data_producer_fn.function_name}?tab=code",
            description=
            "Produce streaming data events and push to Kinesis stream.")
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # TODO: add resources for Part 2, 3 of blog post.

        # Kinesis Data Streams
        kds = kinesis.Stream(self,
                             "KinesisTweets",
                             stream_name="kinesis-tweets",
                             shard_count=5,
                             retention_period=Duration.hours(48))

        # Fargate Task Role
        task_role = iam.Role(
            self,
            'task_role',
            assumed_by=iam.ServicePrincipal('ecs-tasks.amazonaws.com'))
        # Policy to allow task to put records into Kinessis
        task_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=[
                                    'kinesis:PutRecord', 'kinesis:PutRecords',
                                    'kinesis:DescribeStream'
                                ],
                                resources=[kds.stream_arn]))
        # Policy to get secret from SecretsManager
        task_role.add_to_policy(
            iam.PolicyStatement(effect=iam.Effect.ALLOW,
                                actions=[
                                    'secretsmanager:GetResourcePolicy',
                                    'secretsmanager:GetSecretValue',
                                    'secretsmanager:DescribeSecret',
                                    'secretsmanager:ListSecretVersionIds'
                                ],
                                resources=['*']))

        # VPC
        vpc = ec2.Vpc(
            self,
            'FargateVPC',
            max_azs=2  # Default is all AZs in the region
        )

        # ECS Cluster
        cluster = ecs.Cluster(self, 'EcsCluster', vpc=vpc)

        # Fargate Task Definition
        task_definition = ecs.FargateTaskDefinition(self,
                                                    'ServiceTaskDefinition',
                                                    cpu=256,
                                                    memory_limit_mib=512,
                                                    task_role=task_role)

        # Fargate log driver
        fargate_logger = ecs.AwsLogDriver(stream_prefix='fargate_twitter_logs')

        # Container
        task_definition.add_container(
            'ServiceContainer',
            image=ecs.ContainerImage.from_asset('./ECSContainerFiles'),
            environment={
                'KINESIS_STREAM_NAME': kds.stream_name,
                'REGION_NAME': self.region,
                'KEYWORD': 'trump',
                'SECRETS_NAME': 'TwitterAPISecrets'
            },
            logging=fargate_logger)

        # Fargate Service
        service = ecs.FargateService(self,
                                     'ServiceFargateService',
                                     task_definition=task_definition,
                                     assign_public_ip=True,
                                     cluster=cluster)
Exemplo n.º 26
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        # The code that defines your stack goes here

        #获取vpc
        #vpc = ec2.Vpc.from_lookup(self, 'default',is_default=True,vpc_name='default')
        vpc = ec2.Vpc.from_lookup(self,
                                  'dms-vpc',
                                  vpc_id='vpc-08b56fb6053ca2c75')

        #创建RDS参数组
        db_parameter = rds.ParameterGroup(
            self,
            'dms-param-mysql5.7',
            engine=rds.DatabaseInstanceEngine.mysql(
                version=rds.MysqlEngineVersion.VER_5_7),
            parameters={"binlog_format": "ROW"})

        # sourceDB = rds.DatabaseInstanceFromSnapshot(
        #     self,'dms-rds-soruce',
        #     snapshot_identifier= 'tickets-mysql57',
        #     engine=rds.DatabaseInstanceEngine.MYSQL,
        #     instance_type=ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3,ec2.InstanceSize.MEDIUM),
        #     vpc=vpc,
        #     parameter_group=db_parameter
        #     )

        # sourceDB = rds.DatabaseInstance(
        #     self,'dms-rds-soruce',
        #     #instance_identifier='dms-rds-soruce',
        #     engine=rds.DatabaseInstanceEngine.mysql(
        #         version=rds.MysqlEngineVersion.VER_5_7
        #     ),
        #     instance_type=ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3,ec2.InstanceSize.MEDIUM),
        #     vpc=vpc,
        #     parameter_group=db_parameter,
        #     #credentials=rdsPasswordSecret
        #     )

        # sourceDB.connections.allow_default_port_internally()

        dms_rep = dms.CfnReplicationInstance(
            self,
            'dms-replication',
            replication_instance_class='dms.c5.large',
            engine_version='3.4.0')

        stream = kinesis.Stream(self, 'dms-steam')

        streamWriteRole = iam.Role(
            self,
            'dms-stream-role',
            assumed_by=iam.ServicePrincipal('dms.amazonaws.com'))

        streamWriteRole.add_to_policy(
            iam.PolicyStatement(resources=[stream.stream_arn],
                                actions=[
                                    'kinesis:DescribeStream',
                                    'kinesis:PutRecord', 'kinesis:PutRecords'
                                ]))

        source = dms.CfnEndpoint(
            self,
            'dms-source',
            endpoint_type='source',
            engine_name='mysql',
            username='******',
            password='******',
            server_name=
            "dms-rdssource.c7iucbqgd2xo.us-east-1.rds.amazonaws.com",
            port=3306)

        target = dms.CfnEndpoint(self,
                                 'dms-target',
                                 endpoint_type='target',
                                 engine_name='kinesis',
                                 kinesis_settings={
                                     "messageFormat":
                                     "JSON",
                                     'streamArn':
                                     stream.stream_arn,
                                     "serviceAccessRoleArn":
                                     streamWriteRole.role_arn
                                 })

        dmsTableMappings = {
            "rules": [{
                "rule-type": "selection",
                "rule-id": "1",
                "rule-name": "1",
                "object-locator": {
                    "schema-name": "dms_sample",
                    "table-name": "t_log_levelup"
                },
                "rule-action": "include"
            }]
        }

        dms.CfnReplicationTask(self,
                               'dms-stream-repTask',
                               replication_instance_arn=dms_rep.ref,
                               migration_type='full-load-and-cdc',
                               source_endpoint_arn=source.ref,
                               target_endpoint_arn=target.ref,
                               table_mappings=json.dumps(dmsTableMappings))

        analyticsRole = iam.Role(
            self,
            'KinesisAnalyticsRole',
            assumed_by=iam.ServicePrincipal('kinesisanalytics.amazonaws.com'))

        kinesisanalytics.CfnApplicationV2(
            self,
            'KinesisAnalytics',
            application_name='dms-stream-anlytics',
            service_execution_role=analyticsRole.role_arn,
            runtime_environment='SQL-1_0',
            application_configuration={
                'sqlApplicationConfiguration': {
                    'inputs': [{
                        'namePrefix': "exampleNamePrefix",
                        'inputSchema': {
                            'recordColumns': [{
                                'name': "example",
                                'sqlType': "VARCHAR(16)",
                                'mapping': "$.example"
                            }],
                            'recordFormat': {
                                'recordFormatType': "JSON",
                                'mappingParameters': {
                                    'jsonMappingParameters': {
                                        'recordRowPath': "$"
                                    }
                                }
                            }
                        },
                        'kinesisStreamsInput': {
                            'resourceArn': stream.stream_arn
                        }
                    }]
                },
                'applicationCodeConfiguration': {
                    'codeContent': {
                        'textContent': "Example Application Code"
                    },
                    'codeContentType': "PLAINTEXT"
                }
            })
Exemplo n.º 27
0
    def __init__(self, scope: core.Construct, construct_id: str,
                 **kwargs) -> None:
        super().__init__(scope, construct_id, **kwargs)

        test_queue = sqs.Queue(self, 'test-queue', queue_name='test1')

        test_topic = sns.Topic(self, 'test-topic')

        sns.Subscription(self,
                         'test-subscription',
                         topic=test_topic,
                         endpoint=test_queue.queue_arn,
                         protocol=sns.SubscriptionProtocol.SQS)

        kinesis.Stream(self,
                       'test-stream',
                       stream_name='donut-sales',
                       shard_count=2)

        create_order = step.Pass(self,
                                 'create-order',
                                 result=step.Result.from_object({
                                     "Order": {
                                         "Customer": "Alice",
                                         "Product": "Coffee",
                                         "Billing": {
                                             "Price": 10.0,
                                             "Quantity": 4.0
                                         }
                                     }
                                 }))
        calculate_amount = step.Pass(self,
                                     'calculate-amount',
                                     result=step.Result.from_number(40.0),
                                     result_path='$.Order.Billing.Amount',
                                     output_path='$.Order.Billing')
        order_definition = create_order.next(calculate_amount)
        step.StateMachine(self,
                          'test-state-machine',
                          state_machine_name='order-machine',
                          definition=order_definition)

        make_tea = step.Choice(
            self, 'make-tea', comment='Input should look like {"tea":"green"}')
        green = step.Pass(self,
                          'green',
                          result=step.Result.from_string('Green tea'))
        make_tea.when(step.Condition.string_equals('$.tea', 'green'), green)
        black = step.Pass(self,
                          'black',
                          result=step.Result.from_string('Black tea'))
        make_tea.when(step.Condition.string_equals('$.tea', 'black'), black)
        orange = step.Pass(self,
                           'orange',
                           result=step.Result.from_string('Black tea'))
        make_tea.when(step.Condition.string_equals('$.tea', 'orange'), orange)
        error = step.Pass(self,
                          'error',
                          result=step.Result.from_string('Bad input'))
        make_tea.otherwise(error)
        step.StateMachine(self,
                          'test-state-machine-2',
                          state_machine_name='tea-machine',
                          definition=make_tea)
Exemplo n.º 28
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        self.kinesis = ks.Stream(self,
                                 'MailStream',
                                 encryption=ks.StreamEncryption.MANAGED)
Exemplo n.º 29
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        vpc = aws_ec2.Vpc(
            self,
            "OctemberVPC",
            max_azs=2,
            #      subnet_configuration=[{
            #          "cidrMask": 24,
            #          "name": "Public",
            #          "subnetType": aws_ec2.SubnetType.PUBLIC,
            #        },
            #        {
            #          "cidrMask": 24,
            #          "name": "Private",
            #          "subnetType": aws_ec2.SubnetType.PRIVATE
            #        },
            #        {
            #          "cidrMask": 28,
            #          "name": "Isolated",
            #          "subnetType": aws_ec2.SubnetType.ISOLATED,
            #          "reserved": True
            #        }
            #      ],
            gateway_endpoints={
                "S3":
                aws_ec2.GatewayVpcEndpointOptions(
                    service=aws_ec2.GatewayVpcEndpointAwsService.S3)
            })

        dynamo_db_endpoint = vpc.add_gateway_endpoint(
            "DynamoDbEndpoint",
            service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB)

        s3_bucket = s3.Bucket(
            self,
            "s3bucket",
            bucket_name="octember-bizcard-{region}-{account}".format(
                region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID))

        api = apigw.RestApi(
            self,
            "BizcardImageUploader",
            rest_api_name="BizcardImageUploader",
            description="This service serves uploading bizcard images into s3.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            binary_media_types=["image/png", "image/jpg"],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        rest_api_role = aws_iam.Role(
            self,
            "ApiGatewayRoleForS3",
            role_name="ApiGatewayRoleForS3FullAccess",
            assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"),
            managed_policies=[
                aws_iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess")
            ])

        list_objects_responses = [
            apigw.IntegrationResponse(
                status_code="200",
                #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters
                # The response parameters from the backend response that API Gateway sends to the method response.
                # Use the destination as the key and the source as the value:
                #  - The destination must be an existing response parameter in the MethodResponse property.
                #  - The source must be an existing method request parameter or a static value.
                response_parameters={
                    'method.response.header.Timestamp':
                    'integration.response.header.Date',
                    'method.response.header.Content-Length':
                    'integration.response.header.Content-Length',
                    'method.response.header.Content-Type':
                    'integration.response.header.Content-Type'
                }),
            apigw.IntegrationResponse(status_code="400",
                                      selection_pattern="4\d{2}"),
            apigw.IntegrationResponse(status_code="500",
                                      selection_pattern="5\d{2}")
        ]

        list_objects_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses)

        get_s3_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path='/',
            options=list_objects_integration_options)

        api.root.add_method(
            "GET",
            get_s3_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={'method.request.header.Content-Type': False})

        get_s3_folder_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters
            # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value.
            # The source must be an existing method request parameter or a static value.
            request_parameters={
                "integration.request.path.bucket": "method.request.path.folder"
            })

        get_s3_folder_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}",
            options=get_s3_folder_integration_options)

        s3_folder = api.root.add_resource('{folder}')
        s3_folder.add_method(
            "GET",
            get_s3_folder_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True
            })

        get_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=list_objects_responses,
            request_parameters={
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        get_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="GET",
            path="{bucket}/{object}",
            options=get_s3_item_integration_options)

        s3_item = s3_folder.add_resource('{item}')
        s3_item.add_method(
            "GET",
            get_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Timestamp': False,
                        'method.response.header.Content-Length': False,
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        put_s3_item_integration_options = apigw.IntegrationOptions(
            credentials_role=rest_api_role,
            integration_responses=[
                apigw.IntegrationResponse(status_code="200"),
                apigw.IntegrationResponse(status_code="400",
                                          selection_pattern="4\d{2}"),
                apigw.IntegrationResponse(status_code="500",
                                          selection_pattern="5\d{2}")
            ],
            request_parameters={
                "integration.request.header.Content-Type":
                "method.request.header.Content-Type",
                "integration.request.path.bucket":
                "method.request.path.folder",
                "integration.request.path.object": "method.request.path.item"
            })

        put_s3_item_integration = apigw.AwsIntegration(
            service="s3",
            integration_http_method="PUT",
            path="{bucket}/{object}",
            options=put_s3_item_integration_options)

        s3_item.add_method(
            "PUT",
            put_s3_item_integration,
            authorization_type=apigw.AuthorizationType.IAM,
            api_key_required=False,
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_parameters={
                        'method.response.header.Content-Type': False
                    },
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ],
            request_parameters={
                'method.request.header.Content-Type': False,
                'method.request.path.folder': True,
                'method.request.path.item': True
            })

        ddb_table = dynamodb.Table(
            self,
            "BizcardImageMetaInfoDdbTable",
            table_name="OctemberBizcardImgMeta",
            partition_key=dynamodb.Attribute(
                name="image_id", type=dynamodb.AttributeType.STRING),
            billing_mode=dynamodb.BillingMode.PROVISIONED,
            read_capacity=15,
            write_capacity=5)

        img_kinesis_stream = kinesis.Stream(
            self, "BizcardImagePath", stream_name="octember-bizcard-image")

        # create lambda function
        trigger_textract_lambda_fn = _lambda.Function(
            self,
            "TriggerTextExtractorFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="TriggerTextExtractorFromImage",
            handler="trigger_text_extract_from_s3_image.lambda_handler",
            description="Trigger to extract text from an image in S3",
            code=_lambda.Code.asset(
                "./src/main/python/TriggerTextExtractFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        ddb_table_rw_policy_statement = aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            resources=[ddb_table.table_arn],
            actions=[
                "dynamodb:BatchGetItem", "dynamodb:Describe*",
                "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query",
                "dynamodb:Scan", "dynamodb:BatchWriteItem",
                "dynamodb:DeleteItem", "dynamodb:PutItem",
                "dynamodb:UpdateItem", "dax:Describe*", "dax:List*",
                "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan",
                "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem",
                "dax:UpdateItem"
            ])

        trigger_textract_lambda_fn.add_to_role_policy(
            ddb_table_rw_policy_statement)
        trigger_textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[img_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        # assign notification for the s3 event type (ex: OBJECT_CREATED)
        s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/",
                                                   suffix=".jpg")
        s3_event_source = S3EventSource(s3_bucket,
                                        events=[s3.EventType.OBJECT_CREATED],
                                        filters=[s3_event_filter])
        trigger_textract_lambda_fn.add_event_source(s3_event_source)

        #XXX: https://github.com/aws/aws-cdk/issues/2240
        # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a
        # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props
        log_group = aws_logs.LogGroup(
            self,
            "TriggerTextractLogGroup",
            log_group_name="/aws/lambda/TriggerTextExtractorFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(trigger_textract_lambda_fn)

        text_kinesis_stream = kinesis.Stream(
            self, "BizcardTextData", stream_name="octember-bizcard-txt")

        textract_lambda_fn = _lambda.Function(
            self,
            "GetTextFromImage",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="GetTextFromImage",
            handler="get_text_from_s3_image.lambda_handler",
            description="extract text from an image in S3",
            code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'DDB_TABLE_NAME': ddb_table.table_name,
                'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name
            },
            timeout=core.Duration.minutes(5))

        textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement)
        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:Get*", "kinesis:List*",
                                        "kinesis:Describe*",
                                        "kinesis:PutRecord",
                                        "kinesis:PutRecords"
                                    ]))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        textract_lambda_fn.add_to_role_policy(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=["textract:*"]))

        img_kinesis_event_source = KinesisEventSource(
            img_kinesis_stream,
            batch_size=100,
            starting_position=_lambda.StartingPosition.LATEST)
        textract_lambda_fn.add_event_source(img_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "GetTextFromImageLogGroup",
            log_group_name="/aws/lambda/GetTextFromImage",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(textract_lambda_fn)

        sg_use_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard elasticsearch client',
            security_group_name='use-octember-bizcard-es')
        core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es')

        sg_bizcard_es = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard elasticsearch',
            security_group_name='octember-bizcard-es')
        core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es')

        sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='octember-bizcard-es')
        sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es,
                                       connection=aws_ec2.Port.all_tcp(),
                                       description='use-octember-bizcard-es')

        sg_ssh_access = aws_ec2.SecurityGroup(
            self,
            "BastionHostSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for bastion host',
            security_group_name='octember-bastion-host-sg')
        core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host')
        sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(),
                                       connection=aws_ec2.Port.tcp(22),
                                       description='ssh access')

        bastion_host = aws_ec2.BastionHostLinux(
            self,
            "BastionHost",
            vpc=vpc,
            instance_type=aws_ec2.InstanceType('t3.nano'),
            security_group=sg_ssh_access,
            subnet_selection=aws_ec2.SubnetSelection(
                subnet_type=aws_ec2.SubnetType.PUBLIC))
        bastion_host.instance.add_security_group(sg_use_bizcard_es)

        #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873
        es_cfn_domain = aws_elasticsearch.CfnDomain(
            self,
            'BizcardSearch',
            elasticsearch_cluster_config={
                "dedicatedMasterCount": 3,
                "dedicatedMasterEnabled": True,
                "dedicatedMasterType": "t2.medium.elasticsearch",
                "instanceCount": 2,
                "instanceType": "t2.medium.elasticsearch",
                "zoneAwarenessEnabled": True
            },
            ebs_options={
                "ebsEnabled": True,
                "volumeSize": 10,
                "volumeType": "gp2"
            },
            domain_name="octember-bizcard",
            elasticsearch_version="7.9",
            encryption_at_rest_options={"enabled": False},
            access_policies={
                "Version":
                "2012-10-17",
                "Statement": [{
                    "Effect":
                    "Allow",
                    "Principal": {
                        "AWS": "*"
                    },
                    "Action":
                    ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"],
                    "Resource":
                    self.format_arn(service="es",
                                    resource="domain",
                                    resource_name="octember-bizcard/*")
                }]
            },
            snapshot_options={"automatedSnapshotStartHour": 17},
            vpc_options={
                "securityGroupIds": [sg_bizcard_es.security_group_id],
                "subnetIds":
                vpc.select_subnets(
                    subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids
            })
        core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es')

        s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name")

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        s3_lib_bucket = s3.Bucket.from_bucket_name(self, id,
                                                   s3_lib_bucket_name)
        es_lib_layer = _lambda.LayerVersion(
            self,
            "ESLib",
            layer_version_name="es-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-es-lib.zip"))

        redis_lib_layer = _lambda.LayerVersion(
            self,
            "RedisLib",
            layer_version_name="redis-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(s3_lib_bucket,
                                          "var/octember-redis-lib.zip"))

        #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342
        upsert_to_es_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToES",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToElasticSearch",
            handler="upsert_bizcard_to_es.lambda_handler",
            description="Upsert bizcard text into elasticsearch",
            code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard'
            },
            timeout=core.Duration.minutes(5),
            layers=[es_lib_layer],
            security_groups=[sg_use_bizcard_es],
            vpc=vpc)

        text_kinesis_event_source = KinesisEventSource(
            text_kinesis_stream,
            batch_size=99,
            starting_position=_lambda.StartingPosition.LATEST)
        upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToESLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToElasticSearch",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_es_lambda_fn)

        firehose_role_policy_doc = aws_iam.PolicyDocument()
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        s3_bucket.bucket_arn, "{}/*".format(
                            s3_bucket.bucket_arn)
                    ],
                    "actions": [
                        "s3:AbortMultipartUpload", "s3:GetBucketLocation",
                        "s3:GetObject", "s3:ListBucket",
                        "s3:ListBucketMultipartUploads", "s3:PutObject"
                    ]
                }))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=["*"],
                                    actions=[
                                        "glue:GetTable",
                                        "glue:GetTableVersion",
                                        "glue:GetTableVersions"
                                    ]))

        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW,
                                    resources=[text_kinesis_stream.stream_arn],
                                    actions=[
                                        "kinesis:DescribeStream",
                                        "kinesis:GetShardIterator",
                                        "kinesis:GetRecords"
                                    ]))

        firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3"
        firehose_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                effect=aws_iam.Effect.ALLOW,
                #XXX: The ARN will be formatted as follows:
                # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name}
                resources=[
                    self.format_arn(service="logs",
                                    resource="log-group",
                                    resource_name="{}:log-stream:*".format(
                                        firehose_log_group_name),
                                    sep=":")
                ],
                actions=["logs:PutLogEvents"]))

        firehose_role = aws_iam.Role(
            self,
            "FirehoseDeliveryRole",
            role_name="FirehoseDeliveryRole",
            assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={"firehose_role_policy": firehose_role_policy_doc})

        bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream(
            self,
            "BizcardTextToS3",
            delivery_stream_name="octember-bizcard-txt-to-s3",
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration={
                "kinesisStreamArn": text_kinesis_stream.stream_arn,
                "roleArn": firehose_role.role_arn
            },
            extended_s3_destination_configuration={
                "bucketArn": s3_bucket.bucket_arn,
                "bufferingHints": {
                    "intervalInSeconds": 60,
                    "sizeInMBs": 1
                },
                "cloudWatchLoggingOptions": {
                    "enabled": True,
                    "logGroupName": firehose_log_group_name,
                    "logStreamName": "S3Delivery"
                },
                "compressionFormat": "GZIP",
                "prefix": "bizcard-text/",
                "roleArn": firehose_role.role_arn
            })

        sg_use_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache client',
            security_group_name='use-octember-bizcard-es-cache')
        core.Tags.of(sg_use_bizcard_es_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_es_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardSearchCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard search query cache',
            security_group_name='octember-bizcard-es-cache')
        core.Tags.of(sg_bizcard_es_cache).add('Name',
                                              'octember-bizcard-es-cache')

        sg_bizcard_es_cache.add_ingress_rule(
            peer=sg_use_bizcard_es_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-es-cache')

        es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "QueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-es-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-es-cache')

        es_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardSearchQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-es-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-es-cache',
            vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id])

        #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster.
        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname
        es_query_cache.add_depends_on(es_query_cache_subnet_group)

        #XXX: add more than 2 security groups
        # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387
        # https://github.com/aws/aws-cdk/issues/1555
        # https://github.com/aws/aws-cdk/pull/5049
        bizcard_search_lambda_fn = _lambda.Function(
            self,
            "BizcardSearchServer",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardSearchProxy",
            handler="es_search_bizcard.lambda_handler",
            description="Proxy server to search bizcard text",
            code=_lambda.Code.asset("./src/main/python/SearchBizcard"),
            environment={
                'ES_HOST': es_cfn_domain.attr_domain_endpoint,
                'ES_INDEX': 'octember_bizcard',
                'ES_TYPE': 'bizcard',
                'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[es_lib_layer, redis_lib_layer],
            security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        search_api = apigw.LambdaRestApi(
            self,
            "BizcardSearchAPI",
            handler=bizcard_search_lambda_fn,
            proxy=False,
            rest_api_name="BizcardSearch",
            description="This service serves searching bizcard text.",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_search = search_api.root.add_resource('search')
        bizcard_search.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sg_use_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db client',
            security_group_name='use-octember-bizcard-neptune')
        core.Tags.of(sg_use_bizcard_graph_db).add(
            'Name', 'use-octember-bizcard-neptune')

        sg_bizcard_graph_db = aws_ec2.SecurityGroup(
            self,
            "BizcardGraphDbSG",
            vpc=vpc,
            allow_all_outbound=True,
            description='security group for octember bizcard graph db',
            security_group_name='octember-bizcard-neptune')
        core.Tags.of(sg_bizcard_graph_db).add('Name',
                                              'octember-bizcard-neptune')

        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='octember-bizcard-neptune')
        sg_bizcard_graph_db.add_ingress_rule(
            peer=sg_use_bizcard_graph_db,
            connection=aws_ec2.Port.tcp(8182),
            description='use-octember-bizcard-neptune')

        bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(
            self,
            "NeptuneSubnetGroup",
            db_subnet_group_description=
            "subnet group for octember-bizcard-neptune",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            db_subnet_group_name='octember-bizcard-neptune')

        bizcard_graph_db = aws_neptune.CfnDBCluster(
            self,
            "BizcardGraphDB",
            availability_zones=vpc.availability_zones,
            db_subnet_group_name=bizcard_graph_db_subnet_group.
            db_subnet_group_name,
            db_cluster_identifier="octember-bizcard",
            backup_retention_period=1,
            preferred_backup_window="08:45-09:15",
            preferred_maintenance_window="sun:18:00-sun:18:30",
            vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id])
        bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group)

        bizcard_graph_db_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[0],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_instance.add_depends_on(bizcard_graph_db)

        bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance(
            self,
            "BizcardGraphDBReplicaInstance",
            db_instance_class="db.r5.large",
            allow_major_version_upgrade=False,
            auto_minor_version_upgrade=False,
            availability_zone=vpc.availability_zones[-1],
            db_cluster_identifier=bizcard_graph_db.db_cluster_identifier,
            db_instance_identifier="octember-bizcard-replica",
            preferred_maintenance_window="sun:18:00-sun:18:30")
        bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db)
        bizcard_graph_db_replica_instance.add_depends_on(
            bizcard_graph_db_instance)

        gremlinpython_lib_layer = _lambda.LayerVersion(
            self,
            "GremlinPythonLib",
            layer_version_name="gremlinpython-lib",
            compatible_runtimes=[_lambda.Runtime.PYTHON_3_7],
            code=_lambda.Code.from_bucket(
                s3_lib_bucket, "var/octember-gremlinpython-lib.zip"))

        #XXX: https://github.com/aws/aws-cdk/issues/1342
        upsert_to_neptune_lambda_fn = _lambda.Function(
            self,
            "UpsertBizcardToGraphDB",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="UpsertBizcardToNeptune",
            handler="upsert_bizcard_to_graph_db.lambda_handler",
            description="Upsert bizcard into neptune",
            code=_lambda.Code.asset(
                "./src/main/python/UpsertBizcardToGraphDB"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port
            },
            timeout=core.Duration.minutes(5),
            layers=[gremlinpython_lib_layer],
            security_groups=[sg_use_bizcard_graph_db],
            vpc=vpc)

        upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source)

        log_group = aws_logs.LogGroup(
            self,
            "UpsertBizcardToGraphDBLogGroup",
            log_group_name="/aws/lambda/UpsertBizcardToNeptune",
            retention=aws_logs.RetentionDays.THREE_DAYS)
        log_group.grant_write(upsert_to_neptune_lambda_fn)

        sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheClientSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache client',
            security_group_name='use-octember-bizcard-neptune-cache')
        core.Tags.of(sg_use_bizcard_neptune_cache).add(
            'Name', 'use-octember-bizcard-es-cache')

        sg_bizcard_neptune_cache = aws_ec2.SecurityGroup(
            self,
            "BizcardNeptuneCacheSG",
            vpc=vpc,
            allow_all_outbound=True,
            description=
            'security group for octember bizcard recommendation query cache',
            security_group_name='octember-bizcard-neptune-cache')
        core.Tags.of(sg_bizcard_neptune_cache).add(
            'Name', 'octember-bizcard-neptune-cache')

        sg_bizcard_neptune_cache.add_ingress_rule(
            peer=sg_use_bizcard_neptune_cache,
            connection=aws_ec2.Port.tcp(6379),
            description='use-octember-bizcard-neptune-cache')

        recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup(
            self,
            "RecommQueryCacheSubnetGroup",
            description="subnet group for octember-bizcard-neptune-cache",
            subnet_ids=vpc.select_subnets(
                subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids,
            cache_subnet_group_name='octember-bizcard-neptune-cache')

        recomm_query_cache = aws_elasticache.CfnCacheCluster(
            self,
            "BizcardRecommQueryCache",
            cache_node_type="cache.t3.small",
            num_cache_nodes=1,
            engine="redis",
            engine_version="5.0.5",
            auto_minor_version_upgrade=False,
            cluster_name="octember-bizcard-neptune-cache",
            snapshot_retention_limit=3,
            snapshot_window="17:00-19:00",
            preferred_maintenance_window="mon:19:00-mon:20:30",
            #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098
            #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC
            cache_subnet_group_name='octember-bizcard-neptune-cache',
            vpc_security_group_ids=[
                sg_bizcard_neptune_cache.security_group_id
            ])

        recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group)

        bizcard_recomm_lambda_fn = _lambda.Function(
            self,
            "BizcardRecommender",
            runtime=_lambda.Runtime.PYTHON_3_7,
            function_name="BizcardRecommender",
            handler="neptune_recommend_bizcard.lambda_handler",
            description="This service serves PYMK(People You May Know).",
            code=_lambda.Code.asset("./src/main/python/RecommendBizcard"),
            environment={
                'REGION_NAME': core.Aws.REGION,
                'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint,
                'NEPTUNE_PORT': bizcard_graph_db.attr_port,
                'ELASTICACHE_HOST':
                recomm_query_cache.attr_redis_endpoint_address
            },
            timeout=core.Duration.minutes(1),
            layers=[gremlinpython_lib_layer, redis_lib_layer],
            security_groups=[
                sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache
            ],
            vpc=vpc)

        #XXX: create API Gateway + LambdaProxy
        recomm_api = apigw.LambdaRestApi(
            self,
            "BizcardRecommendAPI",
            handler=bizcard_recomm_lambda_fn,
            proxy=False,
            rest_api_name="BizcardRecommend",
            description="This service serves PYMK(People You May Know).",
            endpoint_types=[apigw.EndpointType.REGIONAL],
            deploy=True,
            deploy_options=apigw.StageOptions(stage_name="v1"))

        bizcard_recomm = recomm_api.root.add_resource('pymk')
        bizcard_recomm.add_method(
            "GET",
            method_responses=[
                apigw.MethodResponse(
                    status_code="200",
                    response_models={'application/json': apigw.EmptyModel()}),
                apigw.MethodResponse(status_code="400"),
                apigw.MethodResponse(status_code="500")
            ])

        sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument()
        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:s3:::aws-neptune-notebook",
                        "arn:aws:s3:::aws-neptune-notebook/*"
                    ],
                    "actions": ["s3:GetObject", "s3:ListBucket"]
                }))

        sagemaker_notebook_role_policy_doc.add_statements(
            aws_iam.PolicyStatement(
                **{
                    "effect":
                    aws_iam.Effect.ALLOW,
                    "resources": [
                        "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".
                        format(region=core.Aws.REGION,
                               account=core.Aws.ACCOUNT_ID,
                               cluster_id=bizcard_graph_db.
                               attr_cluster_resource_id)
                    ],
                    "actions": ["neptune-db:connect"]
                }))

        sagemaker_notebook_role = aws_iam.Role(
            self,
            'SageMakerNotebookForNeptuneWorkbenchRole',
            role_name='AWSNeptuneNotebookRole-OctemberBizcard',
            assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'),
            #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221
            inline_policies={
                'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc
            })

        neptune_wb_lifecycle_content = '''#!/bin/bash
sudo -u ec2-user -i <<'EOF'
echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc
echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc
echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc
echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc
aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz
rm -rf /tmp/graph_notebook
tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp
/tmp/graph_notebook/install.sh
EOF
'''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint,
           NeptuneClusterPort=bizcard_graph_db.attr_port,
           AWS_Region=core.Aws.REGION)

        neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty(
            content=core.Fn.base64(neptune_wb_lifecycle_content))

        neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(
            self,
            'NpetuneWorkbenchLifeCycleConfig',
            notebook_instance_lifecycle_config_name=
            'AWSNeptuneWorkbenchOctemberBizcardLCConfig',
            on_start=[neptune_wb_lifecycle_config_prop])

        neptune_workbench = aws_sagemaker.CfnNotebookInstance(
            self,
            'NeptuneWorkbench',
            instance_type='ml.t2.medium',
            role_arn=sagemaker_notebook_role.role_arn,
            lifecycle_config_name=neptune_wb_lifecycle_config.
            notebook_instance_lifecycle_config_name,
            notebook_instance_name='OctemberBizcard-NeptuneWorkbench',
            root_access='Disabled',
            security_group_ids=[sg_use_bizcard_graph_db.security_group_name],
            subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
Exemplo n.º 30
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The code that defines your stack goes here
        table = aws_dynamodb.Table(self, "DashboardModel",
            partition_key=aws_dynamodb.Attribute(name="Pk", type=aws_dynamodb.AttributeType.STRING),
            sort_key=aws_dynamodb.Attribute(name="Sk", type=aws_dynamodb.AttributeType.STRING),
            billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST)
        
        kds_input_stream = aws_kinesis.Stream(self, "kds_dashboard_input_stream",
            shard_count=1, 
            stream_name="kds_dashboard_input_stream")
        
        kds_output_stream = aws_kinesis.Stream(self, "kds_dashboard_output_stream",
            shard_count=1, 
            stream_name="kds_dashboard_output_stream")

        # Creating a ingest bucket for this stack
        ingest_bucket = aws_s3.Bucket(self,'dreis_dboard_ingest_bucket')

        kfh_service_role = aws_iam.Role(self, 'KFH_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('firehose.amazonaws.com')
        )

        kfh_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kfh_service_role.add_to_policy(kfh_policy_stmt)

        #Creating firehose for this stack
        kfh_source = aws_kinesisfirehose.CfnDeliveryStream.KinesisStreamSourceConfigurationProperty(
            kinesis_stream_arn=kds_input_stream.stream_arn,
            role_arn=kfh_service_role.role_arn
        )

        kfh_datalake = aws_kinesisfirehose.CfnDeliveryStream(self, "kfh_datalake",
            s3_destination_configuration=aws_kinesisfirehose.CfnDeliveryStream.S3DestinationConfigurationProperty(
                bucket_arn=ingest_bucket.bucket_arn,
                buffering_hints=aws_kinesisfirehose.CfnDeliveryStream.BufferingHintsProperty(
                    interval_in_seconds=60,
                    size_in_m_bs=5),
                compression_format="UNCOMPRESSED",
                role_arn=kfh_service_role.role_arn
                ),
            delivery_stream_type="KinesisStreamAsSource",
            kinesis_stream_source_configuration=kfh_source
        )

        kda_service_role = aws_iam.Role(self, 'KDA_Dashboard_Role',
            assumed_by=aws_iam.ServicePrincipal('kinesisanalytics.amazonaws.com')
        )

        kda_policy_stmt = aws_iam.PolicyStatement(
            actions=["*"],
            resources=["*"]
        )

        kda_service_role.add_to_policy(kda_policy_stmt)

        # KA doesn't like - (dash) in names
        col1 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="state",
            sql_type="VARCHAR(2)",
            mapping="$.state"
        )

        col2 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="event_time",
            sql_type="TIMESTAMP",
            mapping="$.event-time"
        )
        
        col3 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="region",  
            sql_type="VARCHAR(12)",
            mapping="$.region"
        )

        col4 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="store_id",
            sql_type="INTEGER",
            mapping="$.store-id"
        )

        col5 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_1",
            sql_type="INTEGER",
            mapping="$.kpi-1"
        )
        
        col6 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_2",
            sql_type="INTEGER",
            mapping="$.kpi-2"
        )

        col7 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_3",
            sql_type="INTEGER",
            mapping="$.kpi-3"
        )

        col8 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_4",
            sql_type="INTEGER",
            mapping="$.kpi-4"
        )

        col9 = aws_kinesisanalytics.CfnApplication.RecordColumnProperty(
            name="kpi_5",
            sql_type="INTEGER",
            mapping="$.kpi-5"
        )

        schema = aws_kinesisanalytics.CfnApplication.InputSchemaProperty(
            record_columns=[col2, col1, col3, col4, col5, col6, col7, col8, col9],
            record_encoding="UTF-8",
            record_format=aws_kinesisanalytics.CfnApplication.RecordFormatProperty(
                record_format_type="JSON",
                mapping_parameters=aws_kinesisanalytics.CfnApplication.MappingParametersProperty(
                    json_mapping_parameters=aws_kinesisanalytics.CfnApplication.JSONMappingParametersProperty(
                        record_row_path="$"
                    )
                )
            )
        )

        kda_is = aws_kinesisanalytics.CfnApplication.KinesisStreamsInputProperty(
            resource_arn=kds_input_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        ip = aws_kinesisanalytics.CfnApplication.InputProperty(
            name_prefix="SOURCE_SQL_STREAM",
            input_schema=schema,
            kinesis_streams_input=kda_is
        )

        application_code = "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STORE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), \"store-id\" INTEGER, kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_STATE\" (\"region\" VARCHAR(10), \"state\" VARCHAR(2), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE STREAM \"DESTINATION_SQL_STREAM_BY_REGION\" (\"region\" VARCHAR(10), kpi_1_sum INTEGER,  kpi_2_sum INTEGER, ingest_time TIMESTAMP);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STORE\"" + \
            "SELECT STREAM \"region\", \"state\", \"store-id\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", \"store-id\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_STATE\"" + \
            "SELECT STREAM \"region\", \"state\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", \"state\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);" + \
            "CREATE OR REPLACE PUMP \"STREAM_PUMP\" AS INSERT INTO \"DESTINATION_SQL_STREAM_BY_REGION\"" + \
            "SELECT STREAM \"region\", SUM(\"kpi-1\") AS kpi_1_sum, SUM(\"kpi-2\") AS kpi_2_sum, FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE) as ingest_time" + \
            "FROM \"SOURCE_SQL_STREAM_001\"" + \
            "GROUP BY \"region\", FLOOR(\"SOURCE_SQL_STREAM_001\".APPROXIMATE_ARRIVAL_TIME TO MINUTE), FLOOR((\"SOURCE_SQL_STREAM_001\".ROWTIME - TIMESTAMP '1970-01-01 00:00:00') SECOND / 10 TO SECOND);"

        kda_app = aws_kinesisanalytics.CfnApplication(self, "kda_agg",
            inputs=[ip], #kda_inputs,
            application_code=application_code, 
            application_description="Aggregating data", 
            application_name="DashboardMetricsAggregator"
        )

        kda_output_prop = aws_kinesisanalytics.CfnApplicationOutput.KinesisStreamsOutputProperty(
            resource_arn=kds_output_stream.stream_arn,
            role_arn=kda_service_role.role_arn
        )

        kda_dest_schema = aws_kinesisanalytics.CfnApplicationOutput.DestinationSchemaProperty(
            record_format_type="JSON"
        )

        kda_output_prop_by_store = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STORE"
        )

        kda_output_prop_by_state = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_STATE"
        )

        kda_output_prop_by_region = aws_kinesisanalytics.CfnApplicationOutput.OutputProperty(
            destination_schema=kda_dest_schema,
            kinesis_streams_output=kda_output_prop,
            name="DESTINATION_SQL_STREAM_BY_REGION"
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_store",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_store
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_state",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_state
        )

        kda_app_output_prop = aws_kinesisanalytics.CfnApplicationOutput(self, "kda_agg_output_region",
            application_name="DashboardMetricsAggregator",
            output=kda_output_prop_by_region
        )

        lambda_agg_function = aws_lambda.Function(self, "AggDataLambda",
            runtime=aws_lambda.Runtime.PYTHON_3_7,
            handler="lambda_function.lambda_handler",
            code=aws_lambda.Code.asset("../models/dashboard/lambdas/aggregate_data_lambda"),
            timeout=Duration.minutes(5))

        lambda_agg_function.add_environment("DDB_TABLE_DASHBOARD", table.table_name)

        lambda_agg_function.add_to_role_policy(aws_iam.PolicyStatement(
            effect=aws_iam.Effect.ALLOW,
            actions=[
                "kinesis:*"
            ],
            resources=["*"]
        ))

        table.grant_read_write_data(lambda_agg_function)

        kes = aws_lambda_event_sources.KinesisEventSource(kds_output_stream,
            starting_position=aws_lambda.StartingPosition.TRIM_HORIZON,
            batch_size=50, 
            #max_batching_window=100
        )

        lambda_agg_function.add_event_source(kes)

        core.CfnOutput(
            self, "TableName_Dashboard",
            description="Table name for Dashboard",
            value=table.table_name
        )

        core.CfnOutput(
            self, "BucketName_Dashboard",
            description="Bucket name",
            value=ingest_bucket.bucket_arn
        )

        core.CfnOutput(
            self, "KinesisInputStream_Dashboard",
            description="Kinesis input for Dashboard",
            value=kds_input_stream.stream_name
        )

        core.CfnOutput(
            self, "KinesisOutputStream_Dashboard",
            description="Kinesis output for Dashboard",
            value=kds_output_stream.stream_name
        )