def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) SAGEMAKER_NOTEBOOK_INSTANCE_TYPE = cdk.CfnParameter( self, 'SageMakerNotebookInstanceType', type='String', description='Amazon SageMaker Notebook instance type', default='ml.t2.medium') #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # vpc_name = self.node.try_get_context('vpc_name') vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', is_default=True, vpc_name=vpc_name) #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack. #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html # vpc = aws_ec2.Vpc(self, 'SageMakerStudioVPC', # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) sg_sagemaker_notebook_instance = aws_ec2.SecurityGroup( self, "SageMakerNotebookSG", vpc=vpc, allow_all_outbound=True, description='Security group with no ingress rule', security_group_name='sagemaker-nb-{}-sg'.format(''.join( random.sample((string.ascii_letters), k=5)))) sg_sagemaker_notebook_instance.add_ingress_rule( peer=sg_sagemaker_notebook_instance, connection=aws_ec2.Port.all_traffic(), description='sagemaker notebook security group') cdk.Tags.of(sg_sagemaker_notebook_instance).add( 'Name', 'sagemaker-nb-sg') sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:s3:::*"], "actions": [ "s3:GetObject", "s3:PutObject", "s3:DeleteObject", "s3:ListBucket" ] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookRole', role_name='SageMakerNotebookRole-{suffix}'.format( suffix=''.join(random.sample((string.ascii_letters), k=5))), assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), inline_policies={ 'sagemaker-custome-execution-role': sagemaker_notebook_role_policy_doc }, managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSageMakerFullAccess'), aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloudFormationReadOnlyAccess') ]) #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport sagemaker_nb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz tar zxfv mecab-0.996-ko-0.9.2.tar.gz pushd mecab-0.996-ko-0.9.2 ./configure make make check sudo make install sudo ldconfig mecab -v mecab-config --version popd curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz tar -zxvf mecab-ko-dic-2.1.1-20180720.tar.gz pushd mecab-ko-dic-2.1.1-20180720 ./autogen.sh ./configure make sudo make install popd for each in python3 pytorch_latest_p36 do source /home/ec2-user/anaconda3/bin/activate ${{each}} pip install --upgrade pretty_errors pip install --upgrade pandas-profiling[notebook] pip install --upgrade ipython-sql pip install --upgrade PyMySQL pip install torchvision pip install torchtext pip install spacy pip install nltk pip install requests pip install mecab-python pip install konlpy pip install jpype1-py3 conda deactivate done EOF '''.format(AWS_Region=cdk.Aws.REGION) sagemaker_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=cdk.Fn.base64(sagemaker_nb_lifecycle_content)) sagemaker_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'SageMakerNotebookLifeCycleConfig', notebook_instance_lifecycle_config_name= 'SageMakerNotebookLifeCycleConfig', on_start=[sagemaker_lifecycle_config_prop]) sagemaker_notebook_instance = aws_sagemaker.CfnNotebookInstance( self, 'SageMakerNotebookInstance', instance_type=SAGEMAKER_NOTEBOOK_INSTANCE_TYPE.value_as_string, role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=sagemaker_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='MySageMakerWorkbook', root_access='Disabled', security_group_ids=[ sg_sagemaker_notebook_instance.security_group_id ], subnet_id=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids[0])
def __init__(self, scope: Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here vpc = aws_ec2.Vpc(self, "NeptuneHolVPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3 ) } ) sg_use_graph_db = aws_ec2.SecurityGroup(self, "NeptuneClientSG", vpc=vpc, allow_all_outbound=True, description='security group for neptune client', security_group_name='use-neptune-client' ) cdk.Tags.of(sg_use_graph_db).add('Name', 'use-neptune-client') sg_graph_db = aws_ec2.SecurityGroup(self, "NeptuneSG", vpc=vpc, allow_all_outbound=True, description='security group for neptune', security_group_name='neptune-server' ) cdk.Tags.of(sg_graph_db).add('Name', 'neptune-server') sg_graph_db.add_ingress_rule(peer=sg_graph_db, connection=aws_ec2.Port.tcp(8182), description='neptune-server') sg_graph_db.add_ingress_rule(peer=sg_use_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-neptune-client') graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup(self, 'NeptuneHolSubnetGroup', db_subnet_group_description='subnet group for neptune hol', subnet_ids=vpc.select_subnets(subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids, db_subnet_group_name='neptune-hol' ) graph_db = aws_neptune.CfnDBCluster(self, 'NeptuneHol', availability_zones=vpc.availability_zones, db_subnet_group_name=graph_db_subnet_group.db_subnet_group_name, db_cluster_identifier='neptune-hol', backup_retention_period=1, preferred_backup_window='08:45-09:15', preferred_maintenance_window='sun:18:00-sun:18:30', vpc_security_group_ids=[sg_graph_db.security_group_id] ) graph_db.add_depends_on(graph_db_subnet_group) graph_db_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolInstance', db_instance_class='db.r5.large', allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=graph_db.db_cluster_identifier, db_instance_identifier='neptune-hol', preferred_maintenance_window='sun:18:00-sun:18:30' ) graph_db_instance.add_depends_on(graph_db) graph_db_replica_instance = aws_neptune.CfnDBInstance(self, 'NeptuneHolReplicaInstance', db_instance_class='db.r5.large', allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=graph_db.db_cluster_identifier, db_instance_identifier='neptune-hol-replica', preferred_maintenance_window='sun:18:00-sun:18:30' ) graph_db_replica_instance.add_depends_on(graph_db) graph_db_replica_instance.add_depends_on(graph_db_instance) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*"], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements(aws_iam.PolicyStatement(**{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:neptune-db:{region}:{account}:{cluster_id}/*".format( region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, cluster_id=graph_db.attr_cluster_resource_id)], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role(self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-NeptuneHol', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc } ) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=graph_db.attr_endpoint, NeptuneClusterPort=graph_db.attr_port, AWS_Region=cdk.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=cdk.Fn.base64(neptune_wb_lifecycle_content) ) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig(self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name='NeptuneWorkbenchLifeCycleConfig', on_start=[neptune_wb_lifecycle_config_prop] ) neptune_workbench = aws_sagemaker.CfnNotebookInstance(self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config.notebook_instance_lifecycle_config_name, notebook_instance_name='NeptuneHolWorkbench', root_access='Disabled', security_group_ids=[sg_use_graph_db.security_group_id], subnet_id=graph_db_subnet_group.subnet_ids[0] )
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here vpc_name = self.node.try_get_context("vpc_name") vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", is_default=True, vpc_name=vpc_name) sg_use_mysql = aws_ec2.SecurityGroup( self, 'MySQLClientSG', vpc=vpc, allow_all_outbound=True, description='security group for mysql client', security_group_name='use-mysql-sg') core.Tags.of(sg_use_mysql).add('Name', 'mysql-client-sg') sg_mysql_server = aws_ec2.SecurityGroup( self, 'MySQLServerSG', vpc=vpc, allow_all_outbound=True, description='security group for mysql', security_group_name='mysql-server-sg') sg_mysql_server.add_ingress_rule(peer=sg_use_mysql, connection=aws_ec2.Port.tcp(3306), description='use-mysql-sg') core.Tags.of(sg_mysql_server).add('Name', 'mysql-server-sg') rds_subnet_group = aws_rds.SubnetGroup( self, 'RdsSubnetGroup', description='subnet group for mysql', subnet_group_name='aurora-mysql', vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PRIVATE), vpc=vpc) rds_engine = aws_rds.DatabaseClusterEngine.aurora_mysql( version=aws_rds.AuroraMysqlEngineVersion.VER_2_08_1) rds_cluster_param_group = aws_rds.ParameterGroup( self, 'AuroraMySQLClusterParamGroup', engine=rds_engine, description='Custom cluster parameter group for aurora-mysql5.7', parameters={ 'innodb_flush_log_at_trx_commit': '2', 'slow_query_log': '1', 'tx_isolation': 'READ-COMMITTED', 'wait_timeout': '300', 'character-set-client-handshake': '0', 'character_set_server': 'utf8mb4', 'collation_server': 'utf8mb4_unicode_ci', 'init_connect': 'SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci' }) rds_db_param_group = aws_rds.ParameterGroup( self, 'AuroraMySQLDBParamGroup', engine=rds_engine, description='Custom parameter group for aurora-mysql5.7', parameters={ 'slow_query_log': '1', 'tx_isolation': 'READ-COMMITTED', 'wait_timeout': '300', 'init_connect': 'SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci' }) db_cluster_name = self.node.try_get_context('db_cluster_name') # #XXX: aws_rds.Credentials.from_username(username, ...) can not be given user specific Secret name # #XXX: therefore, first create Secret and then use it to create database # db_secret_name = self.node.try_get_context('db_secret_name') # #XXX: arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} # db_secret_arn = 'arn:aws:secretsmanager:{region}:{account}:secret:{resource_name}'.format( # region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, resource_name=db_secret_name) # db_secret = aws_secretsmanager.Secret.from_secret_arn(self, 'DBSecretFromArn', db_secret_arn) # rds_credentials = aws_rds.Credentials.from_secret(db_secret) rds_credentials = aws_rds.Credentials.from_generated_secret("admin") db_cluster = aws_rds.DatabaseCluster( self, 'Database', engine=rds_engine, credentials=rds_credentials, instance_props={ 'instance_type': aws_ec2.InstanceType.of(aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM), 'parameter_group': rds_db_param_group, 'vpc_subnets': { 'subnet_type': aws_ec2.SubnetType.PRIVATE }, 'vpc': vpc, 'auto_minor_version_upgrade': False, 'security_groups': [sg_mysql_server] }, instances=2, parameter_group=rds_cluster_param_group, cloudwatch_logs_retention=aws_logs.RetentionDays.THREE_DAYS, cluster_identifier=db_cluster_name, subnet_group=rds_subnet_group, backup=aws_rds.BackupProps(retention=core.Duration.days(3), preferred_window="03:00-04:00")) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [db_cluster.secret.secret_full_arn], "actions": ["secretsmanager:GetSecretValue"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookRoleForRDS', role_name='AWSSageMakerNotebookRoleForRDS', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), inline_policies={ 'AuroraMySQLSecretPolicy': sagemaker_notebook_role_policy_doc }) cf_readonly_access_policy = aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloudFormationReadOnlyAccess') sagemaker_notebook_role.add_managed_policy(cf_readonly_access_policy) #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport rds_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc source /home/ec2-user/anaconda3/bin/activate python3 pip install --upgrade ipython-sql pip install --upgrade PyMySQL pip install --upgrade pretty_errors source /home/ec2-user/anaconda3/bin/deactivate cd /home/ec2-user/SageMaker wget -N https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem wget -N https://raw.githubusercontent.com/ksmin23/my-aws-cdk-examples/main/rds/sagemaker-aurora_mysql/ipython-sql.ipynb EOF '''.format(AWS_Region=core.Aws.REGION) rds_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(rds_wb_lifecycle_content)) rds_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'MySQLWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'MySQLWorkbenchLifeCycleConfig', on_start=[rds_wb_lifecycle_config_prop]) rds_workbench = aws_sagemaker.CfnNotebookInstance( self, 'AuroraMySQLWorkbench', instance_type='ml.t3.xlarge', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=rds_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='AuroraMySQLWorkbench', root_access='Disabled', security_group_ids=[sg_use_mysql.security_group_name], subnet_id=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids[0]) core.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') core.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId') core.CfnOutput(self, 'DBClusterName', value=db_cluster.cluster_identifier, export_name='DBClusterName') core.CfnOutput(self, 'DBCluster', value=db_cluster.cluster_endpoint.socket_address, export_name='DBCluster') #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_secretsmanager/README.html # secret_arn="arn:aws:secretsmanager:<region>:<account-id-number>:secret:<secret-name>-<random-6-characters>", core.CfnOutput(self, 'DBSecret', value=db_cluster.secret.secret_name, export_name='DBSecret') core.CfnOutput(self, 'SageMakerRole', value=sagemaker_notebook_role.role_name, export_name='SageMakerRole') core.CfnOutput(self, 'SageMakerNotebookInstance', value=rds_workbench.notebook_instance_name, export_name='SageMakerNotebookInstance') core.CfnOutput(self, 'SageMakerNotebookInstanceLifecycleConfig', value=rds_workbench.lifecycle_config_name, export_name='SageMakerNotebookInstanceLifecycleConfig')
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) vpc = aws_ec2.Vpc( self, "OctemberVPC", max_azs=2, # subnet_configuration=[{ # "cidrMask": 24, # "name": "Public", # "subnetType": aws_ec2.SubnetType.PUBLIC, # }, # { # "cidrMask": 24, # "name": "Private", # "subnetType": aws_ec2.SubnetType.PRIVATE # }, # { # "cidrMask": 28, # "name": "Isolated", # "subnetType": aws_ec2.SubnetType.ISOLATED, # "reserved": True # } # ], gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) dynamo_db_endpoint = vpc.add_gateway_endpoint( "DynamoDbEndpoint", service=aws_ec2.GatewayVpcEndpointAwsService.DYNAMODB) s3_bucket = s3.Bucket( self, "s3bucket", bucket_name="octember-bizcard-{region}-{account}".format( region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID)) api = apigw.RestApi( self, "BizcardImageUploader", rest_api_name="BizcardImageUploader", description="This service serves uploading bizcard images into s3.", endpoint_types=[apigw.EndpointType.REGIONAL], binary_media_types=["image/png", "image/jpg"], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) rest_api_role = aws_iam.Role( self, "ApiGatewayRoleForS3", role_name="ApiGatewayRoleForS3FullAccess", assumed_by=aws_iam.ServicePrincipal("apigateway.amazonaws.com"), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3FullAccess") ]) list_objects_responses = [ apigw.IntegrationResponse( status_code="200", #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationResponse.html#aws_cdk.aws_apigateway.IntegrationResponse.response_parameters # The response parameters from the backend response that API Gateway sends to the method response. # Use the destination as the key and the source as the value: # - The destination must be an existing response parameter in the MethodResponse property. # - The source must be an existing method request parameter or a static value. response_parameters={ 'method.response.header.Timestamp': 'integration.response.header.Date', 'method.response.header.Content-Length': 'integration.response.header.Content-Length', 'method.response.header.Content-Type': 'integration.response.header.Content-Type' }), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ] list_objects_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses) get_s3_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path='/', options=list_objects_integration_options) api.root.add_method( "GET", get_s3_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={'method.request.header.Content-Type': False}) get_s3_folder_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_apigateway/IntegrationOptions.html#aws_cdk.aws_apigateway.IntegrationOptions.request_parameters # Specify request parameters as key-value pairs (string-to-string mappings), with a destination as the key and a source as the value. # The source must be an existing method request parameter or a static value. request_parameters={ "integration.request.path.bucket": "method.request.path.folder" }) get_s3_folder_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}", options=get_s3_folder_integration_options) s3_folder = api.root.add_resource('{folder}') s3_folder.add_method( "GET", get_s3_folder_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True }) get_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=list_objects_responses, request_parameters={ "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) get_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="GET", path="{bucket}/{object}", options=get_s3_item_integration_options) s3_item = s3_folder.add_resource('{item}') s3_item.add_method( "GET", get_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Timestamp': False, 'method.response.header.Content-Length': False, 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) put_s3_item_integration_options = apigw.IntegrationOptions( credentials_role=rest_api_role, integration_responses=[ apigw.IntegrationResponse(status_code="200"), apigw.IntegrationResponse(status_code="400", selection_pattern="4\d{2}"), apigw.IntegrationResponse(status_code="500", selection_pattern="5\d{2}") ], request_parameters={ "integration.request.header.Content-Type": "method.request.header.Content-Type", "integration.request.path.bucket": "method.request.path.folder", "integration.request.path.object": "method.request.path.item" }) put_s3_item_integration = apigw.AwsIntegration( service="s3", integration_http_method="PUT", path="{bucket}/{object}", options=put_s3_item_integration_options) s3_item.add_method( "PUT", put_s3_item_integration, authorization_type=apigw.AuthorizationType.IAM, api_key_required=False, method_responses=[ apigw.MethodResponse( status_code="200", response_parameters={ 'method.response.header.Content-Type': False }, response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ], request_parameters={ 'method.request.header.Content-Type': False, 'method.request.path.folder': True, 'method.request.path.item': True }) ddb_table = dynamodb.Table( self, "BizcardImageMetaInfoDdbTable", table_name="OctemberBizcardImgMeta", partition_key=dynamodb.Attribute( name="image_id", type=dynamodb.AttributeType.STRING), billing_mode=dynamodb.BillingMode.PROVISIONED, read_capacity=15, write_capacity=5) img_kinesis_stream = kinesis.Stream( self, "BizcardImagePath", stream_name="octember-bizcard-image") # create lambda function trigger_textract_lambda_fn = _lambda.Function( self, "TriggerTextExtractorFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="TriggerTextExtractorFromImage", handler="trigger_text_extract_from_s3_image.lambda_handler", description="Trigger to extract text from an image in S3", code=_lambda.Code.asset( "./src/main/python/TriggerTextExtractFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': img_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) ddb_table_rw_policy_statement = aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ddb_table.table_arn], actions=[ "dynamodb:BatchGetItem", "dynamodb:Describe*", "dynamodb:List*", "dynamodb:GetItem", "dynamodb:Query", "dynamodb:Scan", "dynamodb:BatchWriteItem", "dynamodb:DeleteItem", "dynamodb:PutItem", "dynamodb:UpdateItem", "dax:Describe*", "dax:List*", "dax:GetItem", "dax:BatchGetItem", "dax:Query", "dax:Scan", "dax:BatchWriteItem", "dax:DeleteItem", "dax:PutItem", "dax:UpdateItem" ]) trigger_textract_lambda_fn.add_to_role_policy( ddb_table_rw_policy_statement) trigger_textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[img_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) # assign notification for the s3 event type (ex: OBJECT_CREATED) s3_event_filter = s3.NotificationKeyFilter(prefix="bizcard-raw-img/", suffix=".jpg") s3_event_source = S3EventSource(s3_bucket, events=[s3.EventType.OBJECT_CREATED], filters=[s3_event_filter]) trigger_textract_lambda_fn.add_event_source(s3_event_source) #XXX: https://github.com/aws/aws-cdk/issues/2240 # To avoid to create extra Lambda Functions with names like LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a # if log_retention=aws_logs.RetentionDays.THREE_DAYS is added to the constructor props log_group = aws_logs.LogGroup( self, "TriggerTextractLogGroup", log_group_name="/aws/lambda/TriggerTextExtractorFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(trigger_textract_lambda_fn) text_kinesis_stream = kinesis.Stream( self, "BizcardTextData", stream_name="octember-bizcard-txt") textract_lambda_fn = _lambda.Function( self, "GetTextFromImage", runtime=_lambda.Runtime.PYTHON_3_7, function_name="GetTextFromImage", handler="get_text_from_s3_image.lambda_handler", description="extract text from an image in S3", code=_lambda.Code.asset("./src/main/python/GetTextFromS3Image"), environment={ 'REGION_NAME': core.Aws.REGION, 'DDB_TABLE_NAME': ddb_table.table_name, 'KINESIS_STREAM_NAME': text_kinesis_stream.stream_name }, timeout=core.Duration.minutes(5)) textract_lambda_fn.add_to_role_policy(ddb_table_rw_policy_statement) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:Get*", "kinesis:List*", "kinesis:Describe*", "kinesis:PutRecord", "kinesis:PutRecords" ])) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) textract_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=["textract:*"])) img_kinesis_event_source = KinesisEventSource( img_kinesis_stream, batch_size=100, starting_position=_lambda.StartingPosition.LATEST) textract_lambda_fn.add_event_source(img_kinesis_event_source) log_group = aws_logs.LogGroup( self, "GetTextFromImageLogGroup", log_group_name="/aws/lambda/GetTextFromImage", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(textract_lambda_fn) sg_use_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard elasticsearch client', security_group_name='use-octember-bizcard-es') core.Tags.of(sg_use_bizcard_es).add('Name', 'use-octember-bizcard-es') sg_bizcard_es = aws_ec2.SecurityGroup( self, "BizcardSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard elasticsearch', security_group_name='octember-bizcard-es') core.Tags.of(sg_bizcard_es).add('Name', 'octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='octember-bizcard-es') sg_bizcard_es.add_ingress_rule(peer=sg_use_bizcard_es, connection=aws_ec2.Port.all_tcp(), description='use-octember-bizcard-es') sg_ssh_access = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for bastion host', security_group_name='octember-bastion-host-sg') core.Tags.of(sg_ssh_access).add('Name', 'octember-bastion-host') sg_ssh_access.add_ingress_rule(peer=aws_ec2.Peer.any_ipv4(), connection=aws_ec2.Port.tcp(22), description='ssh access') bastion_host = aws_ec2.BastionHostLinux( self, "BastionHost", vpc=vpc, instance_type=aws_ec2.InstanceType('t3.nano'), security_group=sg_ssh_access, subnet_selection=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC)) bastion_host.instance.add_security_group(sg_use_bizcard_es) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 es_cfn_domain = aws_elasticsearch.CfnDomain( self, 'BizcardSearch', elasticsearch_cluster_config={ "dedicatedMasterCount": 3, "dedicatedMasterEnabled": True, "dedicatedMasterType": "t2.medium.elasticsearch", "instanceCount": 2, "instanceType": "t2.medium.elasticsearch", "zoneAwarenessEnabled": True }, ebs_options={ "ebsEnabled": True, "volumeSize": 10, "volumeType": "gp2" }, domain_name="octember-bizcard", elasticsearch_version="7.9", encryption_at_rest_options={"enabled": False}, access_policies={ "Version": "2012-10-17", "Statement": [{ "Effect": "Allow", "Principal": { "AWS": "*" }, "Action": ["es:Describe*", "es:List*", "es:Get*", "es:ESHttp*"], "Resource": self.format_arn(service="es", resource="domain", resource_name="octember-bizcard/*") }] }, snapshot_options={"automatedSnapshotStartHour": 17}, vpc_options={ "securityGroupIds": [sg_bizcard_es.security_group_id], "subnetIds": vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids }) core.Tags.of(es_cfn_domain).add('Name', 'octember-bizcard-es') s3_lib_bucket_name = self.node.try_get_context("lib_bucket_name") #XXX: https://github.com/aws/aws-cdk/issues/1342 s3_lib_bucket = s3.Bucket.from_bucket_name(self, id, s3_lib_bucket_name) es_lib_layer = _lambda.LayerVersion( self, "ESLib", layer_version_name="es-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-es-lib.zip")) redis_lib_layer = _lambda.LayerVersion( self, "RedisLib", layer_version_name="redis-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket(s3_lib_bucket, "var/octember-redis-lib.zip")) #XXX: Deploy lambda in VPC - https://github.com/aws/aws-cdk/issues/1342 upsert_to_es_lambda_fn = _lambda.Function( self, "UpsertBizcardToES", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToElasticSearch", handler="upsert_bizcard_to_es.lambda_handler", description="Upsert bizcard text into elasticsearch", code=_lambda.Code.asset("./src/main/python/UpsertBizcardToES"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard' }, timeout=core.Duration.minutes(5), layers=[es_lib_layer], security_groups=[sg_use_bizcard_es], vpc=vpc) text_kinesis_event_source = KinesisEventSource( text_kinesis_stream, batch_size=99, starting_position=_lambda.StartingPosition.LATEST) upsert_to_es_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToESLogGroup", log_group_name="/aws/lambda/UpsertBizcardToElasticSearch", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_es_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "glue:GetTable", "glue:GetTableVersion", "glue:GetTableVersions" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=[text_kinesis_stream.stream_arn], actions=[ "kinesis:DescribeStream", "kinesis:GetShardIterator", "kinesis:GetRecords" ])) firehose_log_group_name = "/aws/kinesisfirehose/octember-bizcard-txt-to-s3" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn(service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), sep=":") ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "FirehoseDeliveryRole", role_name="FirehoseDeliveryRole", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) bizcard_text_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "BizcardTextToS3", delivery_stream_name="octember-bizcard-txt-to-s3", delivery_stream_type="KinesisStreamAsSource", kinesis_stream_source_configuration={ "kinesisStreamArn": text_kinesis_stream.stream_arn, "roleArn": firehose_role.role_arn }, extended_s3_destination_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Delivery" }, "compressionFormat": "GZIP", "prefix": "bizcard-text/", "roleArn": firehose_role.role_arn }) sg_use_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache client', security_group_name='use-octember-bizcard-es-cache') core.Tags.of(sg_use_bizcard_es_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_es_cache = aws_ec2.SecurityGroup( self, "BizcardSearchCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard search query cache', security_group_name='octember-bizcard-es-cache') core.Tags.of(sg_bizcard_es_cache).add('Name', 'octember-bizcard-es-cache') sg_bizcard_es_cache.add_ingress_rule( peer=sg_use_bizcard_es_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-es-cache') es_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "QueryCacheSubnetGroup", description="subnet group for octember-bizcard-es-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-es-cache') es_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardSearchQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-es-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=es_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-es-cache', vpc_security_group_ids=[sg_bizcard_es_cache.security_group_id]) #XXX: If you're going to launch your cluster in an Amazon VPC, you need to create a subnet group before you start creating a cluster. # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-elasticache-cache-cluster.html#cfn-elasticache-cachecluster-cachesubnetgroupname es_query_cache.add_depends_on(es_query_cache_subnet_group) #XXX: add more than 2 security groups # https://github.com/aws/aws-cdk/blob/ea10f0d141a48819ec0000cd7905feda993870a9/packages/%40aws-cdk/aws-lambda/lib/function.ts#L387 # https://github.com/aws/aws-cdk/issues/1555 # https://github.com/aws/aws-cdk/pull/5049 bizcard_search_lambda_fn = _lambda.Function( self, "BizcardSearchServer", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardSearchProxy", handler="es_search_bizcard.lambda_handler", description="Proxy server to search bizcard text", code=_lambda.Code.asset("./src/main/python/SearchBizcard"), environment={ 'ES_HOST': es_cfn_domain.attr_domain_endpoint, 'ES_INDEX': 'octember_bizcard', 'ES_TYPE': 'bizcard', 'ELASTICACHE_HOST': es_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[es_lib_layer, redis_lib_layer], security_groups=[sg_use_bizcard_es, sg_use_bizcard_es_cache], vpc=vpc) #XXX: create API Gateway + LambdaProxy search_api = apigw.LambdaRestApi( self, "BizcardSearchAPI", handler=bizcard_search_lambda_fn, proxy=False, rest_api_name="BizcardSearch", description="This service serves searching bizcard text.", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_search = search_api.root.add_resource('search') bizcard_search.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sg_use_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbClientSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db client', security_group_name='use-octember-bizcard-neptune') core.Tags.of(sg_use_bizcard_graph_db).add( 'Name', 'use-octember-bizcard-neptune') sg_bizcard_graph_db = aws_ec2.SecurityGroup( self, "BizcardGraphDbSG", vpc=vpc, allow_all_outbound=True, description='security group for octember bizcard graph db', security_group_name='octember-bizcard-neptune') core.Tags.of(sg_bizcard_graph_db).add('Name', 'octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='octember-bizcard-neptune') sg_bizcard_graph_db.add_ingress_rule( peer=sg_use_bizcard_graph_db, connection=aws_ec2.Port.tcp(8182), description='use-octember-bizcard-neptune') bizcard_graph_db_subnet_group = aws_neptune.CfnDBSubnetGroup( self, "NeptuneSubnetGroup", db_subnet_group_description= "subnet group for octember-bizcard-neptune", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, db_subnet_group_name='octember-bizcard-neptune') bizcard_graph_db = aws_neptune.CfnDBCluster( self, "BizcardGraphDB", availability_zones=vpc.availability_zones, db_subnet_group_name=bizcard_graph_db_subnet_group. db_subnet_group_name, db_cluster_identifier="octember-bizcard", backup_retention_period=1, preferred_backup_window="08:45-09:15", preferred_maintenance_window="sun:18:00-sun:18:30", vpc_security_group_ids=[sg_bizcard_graph_db.security_group_id]) bizcard_graph_db.add_depends_on(bizcard_graph_db_subnet_group) bizcard_graph_db_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[0], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance = aws_neptune.CfnDBInstance( self, "BizcardGraphDBReplicaInstance", db_instance_class="db.r5.large", allow_major_version_upgrade=False, auto_minor_version_upgrade=False, availability_zone=vpc.availability_zones[-1], db_cluster_identifier=bizcard_graph_db.db_cluster_identifier, db_instance_identifier="octember-bizcard-replica", preferred_maintenance_window="sun:18:00-sun:18:30") bizcard_graph_db_replica_instance.add_depends_on(bizcard_graph_db) bizcard_graph_db_replica_instance.add_depends_on( bizcard_graph_db_instance) gremlinpython_lib_layer = _lambda.LayerVersion( self, "GremlinPythonLib", layer_version_name="gremlinpython-lib", compatible_runtimes=[_lambda.Runtime.PYTHON_3_7], code=_lambda.Code.from_bucket( s3_lib_bucket, "var/octember-gremlinpython-lib.zip")) #XXX: https://github.com/aws/aws-cdk/issues/1342 upsert_to_neptune_lambda_fn = _lambda.Function( self, "UpsertBizcardToGraphDB", runtime=_lambda.Runtime.PYTHON_3_7, function_name="UpsertBizcardToNeptune", handler="upsert_bizcard_to_graph_db.lambda_handler", description="Upsert bizcard into neptune", code=_lambda.Code.asset( "./src/main/python/UpsertBizcardToGraphDB"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port }, timeout=core.Duration.minutes(5), layers=[gremlinpython_lib_layer], security_groups=[sg_use_bizcard_graph_db], vpc=vpc) upsert_to_neptune_lambda_fn.add_event_source(text_kinesis_event_source) log_group = aws_logs.LogGroup( self, "UpsertBizcardToGraphDBLogGroup", log_group_name="/aws/lambda/UpsertBizcardToNeptune", retention=aws_logs.RetentionDays.THREE_DAYS) log_group.grant_write(upsert_to_neptune_lambda_fn) sg_use_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheClientSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache client', security_group_name='use-octember-bizcard-neptune-cache') core.Tags.of(sg_use_bizcard_neptune_cache).add( 'Name', 'use-octember-bizcard-es-cache') sg_bizcard_neptune_cache = aws_ec2.SecurityGroup( self, "BizcardNeptuneCacheSG", vpc=vpc, allow_all_outbound=True, description= 'security group for octember bizcard recommendation query cache', security_group_name='octember-bizcard-neptune-cache') core.Tags.of(sg_bizcard_neptune_cache).add( 'Name', 'octember-bizcard-neptune-cache') sg_bizcard_neptune_cache.add_ingress_rule( peer=sg_use_bizcard_neptune_cache, connection=aws_ec2.Port.tcp(6379), description='use-octember-bizcard-neptune-cache') recomm_query_cache_subnet_group = aws_elasticache.CfnSubnetGroup( self, "RecommQueryCacheSubnetGroup", description="subnet group for octember-bizcard-neptune-cache", subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids, cache_subnet_group_name='octember-bizcard-neptune-cache') recomm_query_cache = aws_elasticache.CfnCacheCluster( self, "BizcardRecommQueryCache", cache_node_type="cache.t3.small", num_cache_nodes=1, engine="redis", engine_version="5.0.5", auto_minor_version_upgrade=False, cluster_name="octember-bizcard-neptune-cache", snapshot_retention_limit=3, snapshot_window="17:00-19:00", preferred_maintenance_window="mon:19:00-mon:20:30", #XXX: Do not use referece for "cache_subnet_group_name" - https://github.com/aws/aws-cdk/issues/3098 #cache_subnet_group_name=recomm_query_cache_subnet_group.cache_subnet_group_name, # Redis cluster goes to wrong VPC cache_subnet_group_name='octember-bizcard-neptune-cache', vpc_security_group_ids=[ sg_bizcard_neptune_cache.security_group_id ]) recomm_query_cache.add_depends_on(recomm_query_cache_subnet_group) bizcard_recomm_lambda_fn = _lambda.Function( self, "BizcardRecommender", runtime=_lambda.Runtime.PYTHON_3_7, function_name="BizcardRecommender", handler="neptune_recommend_bizcard.lambda_handler", description="This service serves PYMK(People You May Know).", code=_lambda.Code.asset("./src/main/python/RecommendBizcard"), environment={ 'REGION_NAME': core.Aws.REGION, 'NEPTUNE_ENDPOINT': bizcard_graph_db.attr_read_endpoint, 'NEPTUNE_PORT': bizcard_graph_db.attr_port, 'ELASTICACHE_HOST': recomm_query_cache.attr_redis_endpoint_address }, timeout=core.Duration.minutes(1), layers=[gremlinpython_lib_layer, redis_lib_layer], security_groups=[ sg_use_bizcard_graph_db, sg_use_bizcard_neptune_cache ], vpc=vpc) #XXX: create API Gateway + LambdaProxy recomm_api = apigw.LambdaRestApi( self, "BizcardRecommendAPI", handler=bizcard_recomm_lambda_fn, proxy=False, rest_api_name="BizcardRecommend", description="This service serves PYMK(People You May Know).", endpoint_types=[apigw.EndpointType.REGIONAL], deploy=True, deploy_options=apigw.StageOptions(stage_name="v1")) bizcard_recomm = recomm_api.root.add_resource('pymk') bizcard_recomm.add_method( "GET", method_responses=[ apigw.MethodResponse( status_code="200", response_models={'application/json': apigw.EmptyModel()}), apigw.MethodResponse(status_code="400"), apigw.MethodResponse(status_code="500") ]) sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:s3:::aws-neptune-notebook", "arn:aws:s3:::aws-neptune-notebook/*" ], "actions": ["s3:GetObject", "s3:ListBucket"] })) sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ "arn:aws:neptune-db:{region}:{account}:{cluster_id}/*". format(region=core.Aws.REGION, account=core.Aws.ACCOUNT_ID, cluster_id=bizcard_graph_db. attr_cluster_resource_id) ], "actions": ["neptune-db:connect"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookForNeptuneWorkbenchRole', role_name='AWSNeptuneNotebookRole-OctemberBizcard', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={ 'AWSNeptuneNotebook': sagemaker_notebook_role_policy_doc }) neptune_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export GRAPH_NOTEBOOK_AUTH_MODE=DEFAULT" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_HOST={NeptuneClusterEndpoint}" >> ~/.bashrc echo "export GRAPH_NOTEBOOK_PORT={NeptuneClusterPort}" >> ~/.bashrc echo "export NEPTUNE_LOAD_FROM_S3_ROLE_ARN=''" >> ~/.bashrc echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc aws s3 cp s3://aws-neptune-notebook/graph_notebook.tar.gz /tmp/graph_notebook.tar.gz rm -rf /tmp/graph_notebook tar -zxvf /tmp/graph_notebook.tar.gz -C /tmp /tmp/graph_notebook/install.sh EOF '''.format(NeptuneClusterEndpoint=bizcard_graph_db.attr_endpoint, NeptuneClusterPort=bizcard_graph_db.attr_port, AWS_Region=core.Aws.REGION) neptune_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(neptune_wb_lifecycle_content)) neptune_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'NpetuneWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'AWSNeptuneWorkbenchOctemberBizcardLCConfig', on_start=[neptune_wb_lifecycle_config_prop]) neptune_workbench = aws_sagemaker.CfnNotebookInstance( self, 'NeptuneWorkbench', instance_type='ml.t2.medium', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=neptune_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='OctemberBizcard-NeptuneWorkbench', root_access='Disabled', security_group_ids=[sg_use_bizcard_graph_db.security_group_name], subnet_id=bizcard_graph_db_subnet_group.subnet_ids[0])
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here vpc_name = self.node.try_get_context('vpc_name') vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', is_default=True, vpc_name=vpc_name) sg_use_docdb = aws_ec2.SecurityGroup( self, 'DocDBClientSG', vpc=vpc, allow_all_outbound=True, description='security group for documentdb client', security_group_name='use-docdb-sg') core.Tags.of(sg_use_docdb).add('Name', 'docdb-client-sg') sg_docdb_server = aws_ec2.SecurityGroup( self, 'DocDBServerSG', vpc=vpc, allow_all_outbound=True, description='security group for documentdb', security_group_name='{stack_name}-server-sg-docdb'.format( stack_name=self.stack_name)) sg_docdb_server.add_ingress_rule(peer=sg_use_docdb, connection=aws_ec2.Port.tcp(27017), description='docdb-client-sg') core.Tags.of(sg_docdb_server).add('Name', 'docdb-server-sg') docdb_cluster_name = self.node.try_get_context('docdb_cluster_name') docdb_cluster_name = docdb_cluster_name if docdb_cluster_name else self.stack_name docdb_cluster = aws_docdb.DatabaseCluster( self, 'DocDB', db_cluster_name=docdb_cluster_name, master_user=aws_docdb.Login(username='******'), instance_props={ #'instance_type': aws_ec2.InstanceType('r5.xlarge'), 'instance_type': aws_ec2.InstanceType.of(aws_ec2.InstanceClass.MEMORY5, aws_ec2.InstanceSize.LARGE), 'vpc_subnets': { 'subnet_type': aws_ec2.SubnetType.PRIVATE }, 'vpc': vpc, 'security_group': sg_docdb_server }, instances=3, preferred_maintenance_window='sun:18:00-sun:18:30', removal_policy=core.RemovalPolicy.RETAIN) #[Warning at /docdb-sm/Database/RotationSingleUser/SecurityGroup] Ignoring Egress rule since 'allowAllOutbound' is set to true; To add customize rules, set allowAllOutbound=false on the SecurityGroup #docdb_cluster.add_rotation_single_user() sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [docdb_cluster.secret.secret_full_arn], "actions": ["secretsmanager:GetSecretValue"] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookRoleForDocDB', role_name='AWSSageMakerNotebookRoleForDocDB', assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), inline_policies={ 'DocumentDBSecretPolicy': sagemaker_notebook_role_policy_doc }) docdb_wb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc source /home/ec2-user/anaconda3/bin/activate python3 pip install --upgrade pymongo pip install --upgrade xgboost source /home/ec2-user/anaconda3/bin/deactivate cd /home/ec2-user/SageMaker wget https://s3.amazonaws.com/rds-downloads/rds-combined-ca-bundle.pem wget https://raw.githubusercontent.com/aws-samples/documentdb-sagemaker-example/main/script.ipynb EOF '''.format(AWS_Region=core.Aws.REGION) docdb_wb_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=core.Fn.base64(docdb_wb_lifecycle_content)) docdb_wb_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'DocDBWorkbenchLifeCycleConfig', notebook_instance_lifecycle_config_name= 'DocDBWorkbenchLifeCycleConfig', on_start=[docdb_wb_lifecycle_config_prop]) docdb_workbench = aws_sagemaker.CfnNotebookInstance( self, 'DocDBWorkbench', instance_type='ml.t3.xlarge', role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=docdb_wb_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='DocDBWorkbench', root_access='Disabled', security_group_ids=[sg_use_docdb.security_group_name], subnet_id=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE).subnet_ids[0]) core.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') core.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId') core.CfnOutput(self, 'DocumentDBClusterName', value=docdb_cluster.cluster_identifier, export_name='DocumentDBClusterName') core.CfnOutput(self, 'DocumentDBCluster', value=docdb_cluster.cluster_endpoint.socket_address, export_name='DocumentDBCluster') #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_secretsmanager/README.html # secret_arn="arn:aws:secretsmanager:<region>:<account-id-number>:secret:<secret-name>-<random-6-characters>", core.CfnOutput(self, 'DocDBSecret', value=docdb_cluster.secret.secret_name, export_name='DocDBSecret') core.CfnOutput(self, 'SageMakerRole', value=sagemaker_notebook_role.role_name, export_name='SageMakerRole') core.CfnOutput(self, 'SageMakerNotebookInstance', value=docdb_workbench.notebook_instance_name, export_name='SageMakerNotebookInstance') core.CfnOutput(self, 'SageMakerNotebookInstanceLifecycleConfig', value=docdb_workbench.lifecycle_config_name, export_name='SageMakerNotebookInstanceLifecycleConfig')
def __init__(self, scope: core.Construct, id: str, *, prefix: str, environment: str, configuration, **kwargs): """ :param scope: Stack class, used by CDK. :param id: ID of the construct, used by CDK. :param prefix: Prefix of the construct, used for naming purposes. :param environment: Environment of the construct, used for naming purposes. :param configuration: Configuration of the construct. In this case SAGEMAKER_NOTEBOOK. :param kwargs: Other parameters that could be used by the construct. """ super().__init__(scope, id, **kwargs) self.prefix = prefix self.environment_ = environment self._configuration = configuration # Validating that the payload passed is correct validate_configuration(configuration_schema=SAGEMAKER_NOTEBOOK_SCHEMA, configuration_received=configuration) base_name = self._configuration["name"] on_create_list = list() if validate_file( file_path=self._configuration["scripts"]["on_create"]): on_create_file = self._configuration["scripts"]["on_create"] else: on_create_file = file_path + "/scripts/iot_analytics_notebook/on_create.sh" with open(on_create_file) as on_create: on_create_contents = {"content": core.Fn.base64(on_create.read())} on_create_list.append(on_create_contents) on_start_list = list() if validate_file( file_path=self._configuration["scripts"]["on_create"]): on_start_file = self._configuration["scripts"]["on_start"] else: on_start_file = file_path + "/scripts/iot_analytics_notebook/on_start.sh" with open(on_start_file) as on_start: on_start_contents = {"content": core.Fn.base64(on_start.read())} on_start_list.append(on_start_contents) lifecycle_configuration_name = self.prefix + "-" + base_name + "lifecycle-" + self.environment_ self._lifecycle_configuration = sagemaker.CfnNotebookInstanceLifecycleConfig( self, id=lifecycle_configuration_name, notebook_instance_lifecycle_config_name= lifecycle_configuration_name, on_create=on_create_list, on_start=on_start_list, ) role_name = self.prefix + "_" + base_name + "sagemaker_role_" + self.environment_ self._role = iam.Role( self, id=role_name, role_name=role_name, assumed_by=iam.ServicePrincipal(service="sagemaker.amazonaws.com")) managed_policy = iam.ManagedPolicy.from_aws_managed_policy_name( managed_policy_name="AmazonSageMakerFullAccess") self._role.add_managed_policy(policy=managed_policy) policy_name = self.prefix + "_" + base_name + "sagemaker_policy" + self.environment_ ecr_statement = iam.PolicyStatement( actions=SAGEMAKER_POLICY["ecr_actions"], resources=SAGEMAKER_POLICY["ecr_resources"]) s3_statement = iam.PolicyStatement( actions=SAGEMAKER_POLICY["s3_actions"], resources=SAGEMAKER_POLICY["s3_resources"]) policy = iam.Policy(self, id=policy_name, policy_name=policy_name, statements=[ecr_statement, s3_statement]) self._role.attach_inline_policy(policy=policy) sagemaker_notebook_name = self.prefix + "_" + base_name + "sagemaker_notebook_" + self.environment_ self._sagemaker_notebook = sagemaker.CfnNotebookInstance( self, id=sagemaker_notebook_name, notebook_instance_name=sagemaker_notebook_name, lifecycle_config_name=self._lifecycle_configuration. notebook_instance_lifecycle_config_name, role_arn=self._role.role_arn, instance_type=self._configuration["instance_type"], )
def __init__(self, app: core.App, id: str) -> None: super().__init__(app, id) #Create mount instance efs = efs_.CfnFileSystem(self, "commonEFS4Notebooks", encrypted=False, performance_mode='generalPurpose', throughput_mode='bursting') print(efs.ref) #Create mount target mount = efs_.CfnMountTarget( self, "MountID", file_system_id=efs.ref, security_groups=default_sg, subnet_id=default_subnet, ) #Create role for Notebook instance nRole = iam_.Role(self, "notebookAccessRole", assumed_by=iam_.ServicePrincipal('sagemaker')) nPolicy = iam_.Policy(self, "notebookAccessPolicy", policy_name="notebookAccessPolicy", statements=[ iam_.PolicyStatement(actions=[ 's3:*', ], resources=[ '*', ]), ]).attach_to_role(nRole) #Create notebook instances cluster instances = [] print(mount.get_att('attr_ip_address').to_string()) encodedScript = LifecycleScriptStr.format(efs.ref) print("Adding following script to the lifecycle config..\n___\n\n" + encodedScript) code = [{"content": core.Fn.base64(encodedScript)}] lifecycleconfig = sagemaker_.CfnNotebookInstanceLifecycleConfig( self, LifeCycleConfigName, notebook_instance_lifecycle_config_name=LifeCycleConfigName, on_create=None, on_start=code) for i in range(num_instances): nid = 'CDK-Notebook-Instance-User-' + str(i) instances.append( sagemaker_.CfnNotebookInstance( self, nid, instance_type='ml.t2.medium', volume_size_in_gb=5, security_group_ids=default_sg, subnet_id=default_subnet, notebook_instance_name=nid, role_arn=nRole.role_arn, lifecycle_config_name=LifeCycleConfigName, ))
def __init__(self, scope: core.Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # The code that defines your stack goes here # VPC self.vpc = ec2.Vpc( self, "VPC", max_azs=2, cidr="10.10.0.0/16", subnet_configuration=[ ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PUBLIC, name="Public", cidr_mask=24), ec2.SubnetConfiguration(subnet_type=ec2.SubnetType.PRIVATE, name="Private", cidr_mask=24) ], nat_gateways=1) # Security group # self.sg = ec2.SecurityGroup(self, "securityGroup", self.vpc) self.sg = ec2.SecurityGroup.from_security_group_id( self, "securityGroup", self.vpc.vpc_default_security_group, mutable=False) # Create EFS inside VPC self.efs = efs.FileSystem( self, "commonEFS4Notebooks", vpc=self.vpc, encrypted=True, enable_automatic_backups=True, performance_mode=efs.PerformanceMode('MAX_IO'), throughput_mode=efs.ThroughputMode('BURSTING'), security_group=self.sg) # Mount target for EFS # self.mount = efs.CfnMountTarget( # self, # "MountID", # file_system_id=self.efs.file_system_id,security_groups=[self.sg.security_group_id,], # subnet_id=self.vpc.private_subnets[0].subnet_id, # ) # IAM Roles #Create role for Notebook instance nRole = iam_.Role(self, "notebookAccessRole", assumed_by=iam_.ServicePrincipal('sagemaker')) nPolicy = iam_.Policy(self, "notebookAccessPolicy", policy_name="notebookAccessPolicy", statements=[ iam_.PolicyStatement(actions=[ 's3:*', ], resources=[ '*', ]), ]).attach_to_role(nRole) #Create notebook instances cluster # print(self.mount.get_att('attr_ip_address').to_string()) encodedScript = LifecycleScriptStr.format(self.efs.file_system_id) # print("Adding following script to the lifecycle config..\n___\n\n"+encodedScript) code = [{"content": core.Fn.base64(encodedScript)}] lifecycleconfig = sm.CfnNotebookInstanceLifecycleConfig( self, "LifeCycleConfig", notebook_instance_lifecycle_config_name=LifeCycleConfigName, on_create=None, on_start=code) instances = [] for i in range(num_instances): nid = 'CDK-Notebook-Instance-User-' + str(i) instances.append( sm.CfnNotebookInstance( self, nid, instance_type='ml.t2.medium', volume_size_in_gb=5, security_group_ids=[self.sg.security_group_id], subnet_id=self.vpc.private_subnets[0].subnet_id, notebook_instance_name=nid, role_arn=nRole.role_arn, lifecycle_config_name=lifecycleconfig. notebook_instance_lifecycle_config_name)) core.CfnOutput(self, "VPC_id", value=self.vpc.vpc_id) core.CfnOutput(self, "EFS_id", value=self.efs.file_system_id) [ core.CfnOutput(self, "NotebookInstance_" + str(c), value=notebook.notebook_instance_name) for c, notebook in enumerate(instances) ]