def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) MEMORYDB_USER_NAME = cdk.CfnParameter( self, 'MemoryDBUserName', type='String', description='memory db user name', default='memdb-admin') MEMORYDB_USER_PASSWORD = cdk.CfnParameter( self, 'MemoryDBUserPassword', type='String', description='memory db user password (16~128 printable characters)' ) memorydb_user = aws_memorydb.CfnUser( self, 'MemoryDBUser', user_name=MEMORYDB_USER_NAME.value_as_string, # refer to https://redis.io/topics/acl access_string='on ~* &* +@all', # refer to https://docs.aws.amazon.com/cli/latest/reference/memorydb/create-user.html authentication_mode={ "Type": "password", "Passwords": [MEMORYDB_USER_PASSWORD.value_as_string] }) self.memorydb_acl = aws_memorydb.CfnACL( self, 'MemoryDBAcl', acl_name='my-memorydb-acl', user_names=[memorydb_user.user_name]) cdk.CfnOutput(self, 'MemoryDBACL', value=self.memorydb_acl.acl_name, export_name='MemoryDBACL')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) s3_bucket_name = cdk.CfnParameter(self, 'S3BucketForStaticContents', type='String', description='s3 bucket that the site contents are deployed to' ) site_bucket = s3.Bucket.from_bucket_name(self, 'S3BucketForStaticSite', s3_bucket_name.value_as_string) cloudfrontOAI = cloudfront.OriginAccessIdentity(self, 'CloudFrontOAI', comment="Allows CloudFront to reach the bucket: {name}".format(name=s3_bucket_name.value_as_string) ); cloudfrontOAI.apply_removal_policy(cdk.RemovalPolicy.DESTROY) #XXX: Add policy document the existing s3 bucket #XXX: https://stackoverflow.com/questions/60087302/how-to-add-resource-policy-to-existing-s3-bucket-with-cdk-in-javascript site_bucket_policy_statement = aws_iam.PolicyStatement(**{ 'actions': ['s3:GetObject'], 'resources': [site_bucket.arn_for_objects('*')], 'principals': [aws_iam.CanonicalUserPrincipal(cloudfrontOAI.cloud_front_origin_access_identity_s3_canonical_user_id)] }) s3.CfnBucketPolicy(self, 'SiteBucketPolicy', bucket=site_bucket.bucket_name, policy_document=aws_iam.PolicyDocument(statements=[site_bucket_policy_statement]) ) distribution = cloudfront.Distribution(self, "myDist", default_behavior=cloudfront.BehaviorOptions( origin=cf_origins.S3Origin(bucket=site_bucket, origin_access_identity=cloudfrontOAI) ), error_responses=[ #XXX: If you have accessed root page of cloudfront url (i.e. https://your-domain.cloudfront.net/), #XXX: 403:Forbidden error might occur. In order to prevent this error, #XXX: configure 403:Forbidden error response page to be 'index.html' cloudfront.ErrorResponse(http_status=403, response_http_status=200, response_page_path='/index.html', ttl=cdk.Duration.seconds(10)), #XXX: Configure 404:NotFound error response page to be 'error.html' cloudfront.ErrorResponse(http_status=404, response_http_status=404, response_page_path='/error.html', ttl=cdk.Duration.seconds(10)) ] ) cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, 'SiteBucket', value=site_bucket.bucket_name, export_name='SiteBucket') cdk.CfnOutput(self, 'DistributionId', value=distribution.distribution_id, export_name='DistributionId') cdk.CfnOutput(self, 'DistributionDomainName', value=distribution.distribution_domain_name, export_name='DistributionDomainName') cdk.CfnOutput(self, 'CloudFrontOriginAccessId', value=cloudfrontOAI.cloud_front_origin_access_identity_s3_canonical_user_id, export_name='CloudFrontOAI')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') vpc_name = self.node.try_get_context("vpc_name") vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", is_default=True, vpc_name=vpc_name) # vpc = aws_ec2.Vpc(self, "Ec2WithPemKeyStackVPC", # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'BastionHostPublicDNSName', value=bastion_host.instance_public_dns_name, export_name='BastionHostPublicDNSName')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) vpc = aws_ec2.Vpc( self, "FirehoseToS3VPC", max_azs=2, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: cdk.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="firehose-to-s3-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) FIREHOSE_STREAM_NAME = cdk.CfnParameter( self, 'FirehoseStreamName', type='String', description='kinesis data firehose stream name', default='PUT-S3-{}'.format(''.join( random.sample((string.ascii_letters), k=5)))) FIREHOSE_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseBufferSize', type='Number', description='kinesis data firehose buffer size', min_value=1, max_value=128, default=128) FIREHOSE_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseBufferInterval', type='Number', description='kinesis data firehose buffer interval', min_value=60, max_value=300, default=60) FIREHOSE_LAMBDA_BUFFER_SIZE = cdk.CfnParameter( self, 'FirehoseLambdaBufferSize', type='Number', description= 'kinesis data firehose buffer size for AWS Lambda to transform records', min_value=1, max_value=3, default=3) FIREHOSE_LAMBDA_BUFFER_INTERVAL = cdk.CfnParameter( self, 'FirehoseLambdaBufferInterval', type='Number', description= 'kinesis data firehose buffer interval for AWS Lambda to transform records', min_value=60, max_value=900, default=300) FIREHOSE_LAMBDA_NUMBER_OF_RETRIES = cdk.CfnParameter( self, 'FirehoseLambdaNumberOfRetries', type='Number', description= 'Number of retries for AWS Lambda to transform records in kinesis data firehose', min_value=1, max_value=5, default=3) FIREHOSE_TO_S3_PREFIX = cdk.CfnParameter( self, 'FirehosePrefix', type='String', description='kinesis data firehose S3 prefix') FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX = cdk.CfnParameter( self, 'FirehoseErrorOutputPrefix', type='String', description='kinesis data firehose S3 error output prefix', default= 'error/year=!{timestamp:yyyy}/month=!{timestamp:MM}/day=!{timestamp:dd}/hour=!{timestamp:HH}/!{firehose:error-output-type}' ) METADATA_EXTRACT_LAMBDA_FN_NAME = "MetadataExtractor" metadata_extract_lambda_fn = aws_lambda.Function( self, "MetadataExtractor", runtime=aws_lambda.Runtime.PYTHON_3_7, function_name="MetadataExtractor", handler="metadata_extractor.lambda_handler", description="Extract partition keys from records", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), timeout=cdk.Duration.minutes(5)) log_group = aws_logs.LogGroup( self, "MetadataExtractorLogGroup", #XXX: Circular dependency between resources occurs # if aws_lambda.Function.function_name is used # instead of literal name of lambda function such as "MetadataExtractor" log_group_name="/aws/lambda/{}".format( METADATA_EXTRACT_LAMBDA_FN_NAME), retention=aws_logs.RetentionDays.THREE_DAYS, removal_policy=cdk.RemovalPolicy.DESTROY) log_group.grant_write(metadata_extract_lambda_fn) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) #XXX: https://docs.aws.amazon.com/ko_kr/cdk/latest/guide/tokens.html # String-encoded tokens: # Avoid manipulating the string in other ways. For example, # taking a substring of a string is likely to break the string token. firehose_log_group_name = f"/aws/kinesisfirehose/{FIREHOSE_STREAM_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} "resources": [ self.format_arn( partition="aws", service="lambda", region=cdk.Aws.REGION, account=cdk.Aws.ACCOUNT_ID, resource="function", resource_name="{}:*".format( metadata_extract_lambda_fn.function_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], "actions": [ "lambda:InvokeFunction", "lambda:GetFunctionConfiguration" ] })) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name="KinesisFirehoseServiceRole-{stream_name}-{region}". format(stream_name=FIREHOSE_STREAM_NAME.value_as_string, region=cdk.Aws.REGION), assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), path='/service-role/', #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) lambda_proc = cfn.ProcessorProperty( type="Lambda", parameters=[ cfn.ProcessorParameterProperty( parameter_name="LambdaArn", parameter_value='{}:{}'.format( metadata_extract_lambda_fn.function_arn, metadata_extract_lambda_fn.current_version.version)), cfn.ProcessorParameterProperty( parameter_name="NumberOfRetries", parameter_value=FIREHOSE_LAMBDA_NUMBER_OF_RETRIES. value_as_string), cfn.ProcessorParameterProperty( parameter_name="RoleArn", parameter_value=firehose_role.role_arn), cfn.ProcessorParameterProperty( parameter_name="BufferSizeInMBs", parameter_value=FIREHOSE_LAMBDA_BUFFER_SIZE.value_as_string ), cfn.ProcessorParameterProperty( parameter_name="BufferIntervalInSeconds", parameter_value=FIREHOSE_LAMBDA_BUFFER_INTERVAL. value_as_string) ]) record_deaggregation_proc = cfn.ProcessorProperty( type="RecordDeAggregation", parameters=[ cfn.ProcessorParameterProperty(parameter_name="SubRecordType", parameter_value="JSON") ]) #XXX: Adding a new line delimiter when delivering data to S3 # This is also particularly useful when dynamic partitioning is applied to aggregated data # because multirecord deaggregation (which must be applied to aggregated data # before it can be dynamically partitioned) removes new lines from records as part of the parsing process. # https://docs.aws.amazon.com/firehose/latest/dev/dynamic-partitioning.html#dynamic-partitioning-new-line-delimiter append_delim_to_record_proc = cfn.ProcessorProperty( type="AppendDelimiterToRecord", parameters=[]) firehose_processing_config = cfn.ProcessingConfigurationProperty( enabled=True, processors=[ record_deaggregation_proc, append_delim_to_record_proc, lambda_proc ]) ext_s3_dest_config = cfn.ExtendedS3DestinationConfigurationProperty( bucket_arn=s3_bucket.bucket_arn, role_arn=firehose_role.role_arn, buffering_hints={ "intervalInSeconds": FIREHOSE_BUFFER_INTERVAL.value_as_number, "sizeInMBs": FIREHOSE_BUFFER_SIZE.value_as_number }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "DestinationDelivery" }, compression_format= "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] data_format_conversion_configuration={"enabled": False}, dynamic_partitioning_configuration={ "enabled": True, "retryOptions": { "durationInSeconds": 300 } }, error_output_prefix=FIREHOSE_TO_S3_ERROR_OUTPUT_PREFIX. value_as_string, prefix=FIREHOSE_TO_S3_PREFIX.value_as_string, processing_configuration=firehose_processing_config) firehose_to_s3_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "FirehoseToS3", delivery_stream_name=FIREHOSE_STREAM_NAME.value_as_string, delivery_stream_type="DirectPut", extended_s3_destination_configuration=ext_s3_dest_config, tags=[{ "key": "Name", "value": FIREHOSE_STREAM_NAME.value_as_string }]) cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) JENKINS_USER = cdk.CfnParameter(self, 'JenkinsUser', type='String', description='Jenkins user name') JENKINS_API_TOKEN = cdk.CfnParameter( self, 'JenkinsAPIToken', type='String', description='Jenkins user api token', no_echo=True) JENKINS_URL = cdk.CfnParameter(self, 'JenkinsUrl', type='String', description='Jenkins url') secret_value = aws_secretsmanager.SecretStringValueBeta1.from_token( json.dumps({ JENKINS_USER.value_as_string: JENKINS_API_TOKEN.value_as_string })) jenkins_api_user_token_secret = aws_secretsmanager.Secret( self, "JenkinsAPIUserTokenSecret", secret_string_beta1=secret_value, description= "Secret to store jenkins username and personal access token") jenkins_trigger_lambda_fn = aws_lambda.Function( self, "LambdaJenkinsTrigger", runtime=aws_lambda.Runtime.PYTHON_3_8, function_name="SageMakerJenkins-LambdaJenkinsTrigger", handler="lambda_jenkins_trigger.lambda_handler", description= "Lambda function invoked by SageMaker Model Package State change", code=aws_lambda.Code.from_asset( os.path.join(os.path.dirname(__file__), 'src/main/python')), environment={ "JenkinsAPIUserTokenSecret": jenkins_api_user_token_secret.secret_name, "JenkinsUrl": JENKINS_URL.value_as_string, }, timeout=cdk.Duration.minutes(5)) jenkins_trigger_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:secretsmanager:*:*:*"], actions=["secretsmanager:GetSecretValue"])) jenkins_trigger_lambda_fn.add_to_role_policy( aws_iam.PolicyStatement(effect=aws_iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=["logs:*"])) event_rule = aws_events.Rule( self, "JenkinsTriggerRule", rule_name="SageMakerJenkinsTriggerRule", event_pattern={ "account": [self.account], "source": ["aws.sagemaker"], "detail_type": ["SageMaker Model Package State Change"], "detail": { "ModelApprovalStatus": ["Approved", "Rejected"] } }, description= '''Rule to trigger a deployment when SageMaker Model registry is updated with a new model package. For example, a new model package is registered with Registry''') event_rule.add_target( aws_events_targets.LambdaFunction(jenkins_trigger_lambda_fn)) event_rule.apply_removal_policy(cdk.RemovalPolicy.DESTROY) cdk.CfnOutput(self, 'JenkinsAPIUserTokenSecretName', value=jenkins_api_user_token_secret.secret_name, export_name='JenkinsAPIUserTokenSecret') cdk.CfnOutput(self, 'JenkinsTriggerLambdaFunctionName', value=jenkins_trigger_lambda_fn.function_name, export_name='LambdaJenkinsTrigger')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) SAGEMAKER_NOTEBOOK_INSTANCE_TYPE = cdk.CfnParameter( self, 'SageMakerNotebookInstanceType', type='String', description='Amazon SageMaker Notebook instance type', default='ml.t2.medium') #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # vpc_name = self.node.try_get_context('vpc_name') vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', is_default=True, vpc_name=vpc_name) #XXX: To use more than 2 AZs, be sure to specify the account and region on your stack. #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/Vpc.html # vpc = aws_ec2.Vpc(self, 'SageMakerStudioVPC', # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) sg_sagemaker_notebook_instance = aws_ec2.SecurityGroup( self, "SageMakerNotebookSG", vpc=vpc, allow_all_outbound=True, description='Security group with no ingress rule', security_group_name='sagemaker-nb-{}-sg'.format(''.join( random.sample((string.ascii_letters), k=5)))) sg_sagemaker_notebook_instance.add_ingress_rule( peer=sg_sagemaker_notebook_instance, connection=aws_ec2.Port.all_traffic(), description='sagemaker notebook security group') cdk.Tags.of(sg_sagemaker_notebook_instance).add( 'Name', 'sagemaker-nb-sg') sagemaker_notebook_role_policy_doc = aws_iam.PolicyDocument() sagemaker_notebook_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": ["arn:aws:s3:::*"], "actions": [ "s3:GetObject", "s3:PutObject", "s3:DeleteObject", "s3:ListBucket" ] })) sagemaker_notebook_role = aws_iam.Role( self, 'SageMakerNotebookRole', role_name='SageMakerNotebookRole-{suffix}'.format( suffix=''.join(random.sample((string.ascii_letters), k=5))), assumed_by=aws_iam.ServicePrincipal('sagemaker.amazonaws.com'), inline_policies={ 'sagemaker-custome-execution-role': sagemaker_notebook_role_policy_doc }, managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSageMakerFullAccess'), aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloudFormationReadOnlyAccess') ]) #XXX: skip downloading rds-combined-ca-bundle.pem if not use SSL with a MySQL DB instance # https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_MySQL.html#MySQL.Concepts.SSLSupport sagemaker_nb_lifecycle_content = '''#!/bin/bash sudo -u ec2-user -i <<'EOF' echo "export AWS_REGION={AWS_Region}" >> ~/.bashrc curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz tar zxfv mecab-0.996-ko-0.9.2.tar.gz pushd mecab-0.996-ko-0.9.2 ./configure make make check sudo make install sudo ldconfig mecab -v mecab-config --version popd curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz tar -zxvf mecab-ko-dic-2.1.1-20180720.tar.gz pushd mecab-ko-dic-2.1.1-20180720 ./autogen.sh ./configure make sudo make install popd for each in python3 pytorch_latest_p36 do source /home/ec2-user/anaconda3/bin/activate ${{each}} pip install --upgrade pretty_errors pip install --upgrade pandas-profiling[notebook] pip install --upgrade ipython-sql pip install --upgrade PyMySQL pip install torchvision pip install torchtext pip install spacy pip install nltk pip install requests pip install mecab-python pip install konlpy pip install jpype1-py3 conda deactivate done EOF '''.format(AWS_Region=cdk.Aws.REGION) sagemaker_lifecycle_config_prop = aws_sagemaker.CfnNotebookInstanceLifecycleConfig.NotebookInstanceLifecycleHookProperty( content=cdk.Fn.base64(sagemaker_nb_lifecycle_content)) sagemaker_lifecycle_config = aws_sagemaker.CfnNotebookInstanceLifecycleConfig( self, 'SageMakerNotebookLifeCycleConfig', notebook_instance_lifecycle_config_name= 'SageMakerNotebookLifeCycleConfig', on_start=[sagemaker_lifecycle_config_prop]) sagemaker_notebook_instance = aws_sagemaker.CfnNotebookInstance( self, 'SageMakerNotebookInstance', instance_type=SAGEMAKER_NOTEBOOK_INSTANCE_TYPE.value_as_string, role_arn=sagemaker_notebook_role.role_arn, lifecycle_config_name=sagemaker_lifecycle_config. notebook_instance_lifecycle_config_name, notebook_instance_name='MySageMakerWorkbook', root_access='Disabled', security_group_ids=[ sg_sagemaker_notebook_instance.security_group_id ], subnet_id=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids[0])
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter( self, 'OpenSearchDomainName', type='String', description='Amazon OpenSearch Service domain name', default='opensearch-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[a-z]+[A-Za-z0-9\-]+') OPENSEARCH_INDEX_NAME = cdk.CfnParameter( self, 'SearchIndexName', type='String', description='Amazon OpenSearch Service index name') EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') # vpc_name = self.node.try_get_context("vpc_name") # vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", # is_default=True, # vpc_name=vpc_name) # vpc = aws_ec2.Vpc( self, "EKKStackVPC", max_azs=3, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) sg_use_opensearch = aws_ec2.SecurityGroup( self, "OpenSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch client', security_group_name='use-opensearch-cluster-sg') cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg') sg_opensearch_cluster = aws_ec2.SecurityGroup( self, "OpenSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch cluster', security_group_name='opensearch-cluster-sg') cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') master_user_secret = aws_secretsmanager.Secret( self, "OpenSearchMasterUserSecret", generate_secret_string=aws_secretsmanager.SecretStringGenerator( secret_string_template=json.dumps({"username": "******"}), generate_string_key="password", # Master password must be at least 8 characters long and contain at least one uppercase letter, # one lowercase letter, one number, and one special character. password_length=8)) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 # You should camelCase the property names instead of PascalCase opensearch_domain = aws_opensearchservice.Domain( self, "OpenSearch", domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string, version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0, #XXX: You cannot use graviton instances with non-graviton instances. # Use graviton instances as data nodes or use non-graviton instances as master nodes. capacity={ "master_nodes": 3, "master_node_instance_type": "r6g.large.search", "data_nodes": 3, "data_node_instance_type": "r6g.large.search" }, ebs={ "volume_size": 10, "volume_type": aws_ec2.EbsDeviceVolumeType.GP2 }, #XXX: az_count must be equal to vpc subnets count. zone_awareness={"availability_zone_count": 3}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }, fine_grained_access_control=aws_opensearchservice. AdvancedSecurityOptions( master_user_name=master_user_secret.secret_value_from_json( "username").to_string(), master_user_password=master_user_secret.secret_value_from_json( "password")), # Enforce HTTPS is required when fine-grained access control is enabled. enforce_https=True, # Node-to-node encryption is required when fine-grained access control is enabled node_to_node_encryption=True, # Encryption-at-rest is required when fine-grained access control is enabled. encryption_at_rest={"enabled": True}, use_unsigned_basic_auth=True, security_groups=[sg_opensearch_cluster], automated_snapshot_start_hour=17, # 2 AM (GTM+9) vpc=vpc, vpc_subnets=[ aws_ec2.SubnetSelection( one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT) ], removal_policy=cdk.RemovalPolicy. DESTROY # default: cdk.RemovalPolicy.RETAIN ) cdk.Tags.of(opensearch_domain).add( 'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}') S3_BUCKET_SUFFIX = ''.join( random.sample((string.ascii_lowercase + string.digits), k=7)) s3_bucket = s3.Bucket( self, "s3bucket", removal_policy=cdk.RemovalPolicy. DESTROY, #XXX: Default: core.RemovalPolicy.RETAIN - The bucket will be orphaned bucket_name="opskk-stack-{region}-{suffix}".format( region=cdk.Aws.REGION, suffix=S3_BUCKET_SUFFIX)) firehose_role_policy_doc = aws_iam.PolicyDocument() firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( **{ "effect": aws_iam.Effect.ALLOW, "resources": [ s3_bucket.bucket_arn, "{}/*".format( s3_bucket.bucket_arn) ], "actions": [ "s3:AbortMultipartUpload", "s3:GetBucketLocation", "s3:GetObject", "s3:ListBucket", "s3:ListBucketMultipartUploads", "s3:PutObject" ] })) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=["*"], actions=[ "ec2:DescribeVpcs", "ec2:DescribeVpcAttribute", "ec2:DescribeSubnets", "ec2:DescribeSecurityGroups", "ec2:DescribeNetworkInterfaces", "ec2:CreateNetworkInterface", "ec2:CreateNetworkInterfacePermission", "ec2:DeleteNetworkInterface" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, resources=[ opensearch_domain.domain_arn, "{}/*".format(opensearch_domain.domain_arn) ], actions=[ "es:DescribeElasticsearchDomain", "es:DescribeElasticsearchDomains", "es:DescribeElasticsearchDomainConfig", "es:ESHttpPost", "es:ESHttpPut" ])) firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: https://aws.amazon.com/premiumsupport/knowledge-center/kinesis-data-firehose-delivery-failure/ resources=[ opensearch_domain.domain_arn, f"{opensearch_domain.domain_arn}/_all/_settings", f"{opensearch_domain.domain_arn}/_cluster/stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_mapping/%FIREHOSE_POLICY_TEMPLATE_PLACEHOLDER%", f"{opensearch_domain.domain_arn}/_nodes", f"{opensearch_domain.domain_arn}/_nodes/stats", f"{opensearch_domain.domain_arn}/_nodes/*/stats", f"{opensearch_domain.domain_arn}/_stats", f"{opensearch_domain.domain_arn}/{OPENSEARCH_INDEX_NAME.value_as_string}*/_stats" ], actions=["es:ESHttpGet"])) firehose_log_group_name = f"/aws/kinesisfirehose/{OPENSEARCH_INDEX_NAME.value_as_string}" firehose_role_policy_doc.add_statements( aws_iam.PolicyStatement( effect=aws_iam.Effect.ALLOW, #XXX: The ARN will be formatted as follows: # arn:{partition}:{service}:{region}:{account}:{resource}{sep}}{resource-name} resources=[ self.format_arn( service="logs", resource="log-group", resource_name="{}:log-stream:*".format( firehose_log_group_name), arn_format=cdk.ArnFormat.COLON_RESOURCE_NAME) ], actions=["logs:PutLogEvents"])) firehose_role = aws_iam.Role( self, "KinesisFirehoseServiceRole", role_name= f"KinesisFirehoseServiceRole-{OPENSEARCH_INDEX_NAME.value_as_string}-{cdk.Aws.REGION}", assumed_by=aws_iam.ServicePrincipal("firehose.amazonaws.com"), #XXX: use inline_policies to work around https://github.com/aws/aws-cdk/issues/5221 inline_policies={"firehose_role_policy": firehose_role_policy_doc}) opensearch_dest_vpc_config = aws_kinesisfirehose.CfnDeliveryStream.VpcConfigurationProperty( role_arn=firehose_role.role_arn, security_group_ids=[sg_use_opensearch.security_group_id], subnet_ids=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids) opensearch_dest_config = aws_kinesisfirehose.CfnDeliveryStream.ElasticsearchDestinationConfigurationProperty( index_name=OPENSEARCH_INDEX_NAME.value_as_string, role_arn=firehose_role.role_arn, s3_configuration={ "bucketArn": s3_bucket.bucket_arn, "bufferingHints": { "intervalInSeconds": 60, "sizeInMBs": 1 }, "cloudWatchLoggingOptions": { "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "S3Backup" }, "compressionFormat": "UNCOMPRESSED", # [GZIP | HADOOP_SNAPPY | Snappy | UNCOMPRESSED | ZIP] # Kinesis Data Firehose automatically appends the “YYYY/MM/dd/HH/” UTC prefix to delivered S3 files. You can also specify # an extra prefix in front of the time format and add "/" to the end to have it appear as a folder in the S3 console. "prefix": f"{OPENSEARCH_INDEX_NAME.value_as_string}/", "roleArn": firehose_role.role_arn }, buffering_hints={ "intervalInSeconds": 60, "sizeInMBs": 1 }, cloud_watch_logging_options={ "enabled": True, "logGroupName": firehose_log_group_name, "logStreamName": "ElasticsearchDelivery" }, domain_arn=opensearch_domain.domain_arn, index_rotation_period= "NoRotation", # [NoRotation | OneDay | OneHour | OneMonth | OneWeek] retry_options={"durationInSeconds": 60}, s3_backup_mode= "FailedDocumentsOnly", # [AllDocuments | FailedDocumentsOnly] vpc_configuration=opensearch_dest_vpc_config) firehose_to_ops_delivery_stream = aws_kinesisfirehose.CfnDeliveryStream( self, "KinesisFirehoseToES", delivery_stream_name=OPENSEARCH_INDEX_NAME.value_as_string, delivery_stream_type="DirectPut", elasticsearch_destination_configuration=opensearch_dest_config, tags=[{ "key": "Name", "value": OPENSEARCH_INDEX_NAME.value_as_string }]) cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'OpenSearchDomainEndpoint', value=opensearch_domain.domain_endpoint, export_name='OpenSearchDomainEndpoint') cdk.CfnOutput( self, 'OpenSearchDashboardsURL', value=f"{opensearch_domain.domain_endpoint}/_dashboards/", export_name='OpenSearchDashboardsURL') cdk.CfnOutput(self, 'MasterUserSecretId', value=master_user_secret.secret_name, export_name='MasterUserSecretId') cdk.CfnOutput(self, '{}_S3DestBucket'.format(self.stack_name), value=s3_bucket.bucket_name, export_name='S3DestBucket') cdk.CfnOutput(self, 'FirehoseRoleArn', value=firehose_role.role_arn, export_name='FirehoseRoleArn')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) EMR_EC2_KEY_PAIR_NAME = cdk.CfnParameter(self, 'EMREC2KeyPairName', type='String', description='Amazon EMR EC2 Instance KeyPair name', default='emr' ) EMR_CLUSTER_NAME = cdk.CfnParameter(self, 'EMRClusterName', type='String', description='Amazon EMR Cluster name', default='my-emr-cluster' ) vpc_name = self.node.try_get_context("vpc_name") vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", is_default=True, vpc_name=vpc_name) # vpc = aws_ec2.Vpc(self, "EMRStackVPC", # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) emr_instances = aws_emr.CfnCluster.JobFlowInstancesConfigProperty( core_instance_group=aws_emr.CfnCluster.InstanceGroupConfigProperty( instance_count=2, instance_type="m5.xlarge", market="ON_DEMAND" ), ec2_subnet_id=vpc.public_subnets[0].subnet_id, keep_job_flow_alive_when_no_steps=True, # After last step completes: Cluster waits master_instance_group=aws_emr.CfnCluster.InstanceGroupConfigProperty( instance_count=1, instance_type="m5.xlarge", market="ON_DEMAND" ), termination_protected=True ) emr_cfn_cluster = aws_emr.CfnCluster(self, "MyEMRCluster", instances=emr_instances, # In order to use the default role for `job_flow_role`, you must have already created it using the CLI or console job_flow_role="EMR_EC2_DefaultRole", name=EMR_CLUSTER_NAME.value_as_string, # service_role="EMR_DefaultRole_V2", service_role="EMR_DefaultRole", applications=[ aws_emr.CfnCluster.ApplicationProperty(name="Hadoop"), aws_emr.CfnCluster.ApplicationProperty(name="Hive"), aws_emr.CfnCluster.ApplicationProperty(name="JupyterHub"), aws_emr.CfnCluster.ApplicationProperty(name="Livy"), aws_emr.CfnCluster.ApplicationProperty(name="Spark"), aws_emr.CfnCluster.ApplicationProperty(name="JupyterEnterpriseGateway") ], bootstrap_actions=None, configurations=[ aws_emr.CfnCluster.ConfigurationProperty( classification="hive-site", configuration_properties={ "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" }), aws_emr.CfnCluster.ConfigurationProperty( classification="spark-hive-site", configuration_properties={ "hive.metastore.client.factory.class": "com.amazonaws.glue.catalog.metastore.AWSGlueDataCatalogHiveClientFactory" }) ], ebs_root_volume_size=10, log_uri="s3n://aws-logs-{account}-{region}/elasticmapreduce/".format(account=cdk.Aws.ACCOUNT_ID, region=cdk.Aws.REGION), release_label="emr-6.5.0", scale_down_behavior="TERMINATE_AT_TASK_COMPLETION", # tags=[cdk.CfnTag( # key="for-use-with-amazon-emr-managed-policies", # value="true" # )], visible_to_all_users=True )
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') vpc_name = self.node.try_get_context("vpc_name") vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", is_default=True, vpc_name=vpc_name) # vpc = aws_ec2.Vpc(self, "JenkinsOnEC2Stack", # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_jenkins_host = aws_ec2.SecurityGroup( self, "JenkinsHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an jenkins host', security_group_name='jenkins-host-sg') cdk.Tags.of(sg_jenkins_host).add('Name', 'jenkins-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(80), description='HTTP access') jenkins_host = aws_ec2.Instance( self, "JenkinsHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux( generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=aws_ec2.AmazonLinuxEdition.STANDARD, kernel=aws_ec2.AmazonLinuxKernel.KERNEL5_X), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_jenkins_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) # Script in S3 as Asset user_data_asset = aws_s3_assets.Asset( self, "JenkinsEC2UserData", path=os.path.join(os.path.dirname(__file__), "user-data/install_jenkins.sh")) local_path = jenkins_host.user_data.add_s3_download_command( bucket=user_data_asset.bucket, bucket_key=user_data_asset.s3_object_key) # Userdata executes script from S3 jenkins_host.user_data.add_execute_file_command(file_path=local_path) user_data_asset.grant_read(jenkins_host.role) cdk.CfnOutput(self, 'JenkinsHostId', value=jenkins_host.instance_id, export_name='JenkinsHostId') cdk.CfnOutput(self, 'JenkinsHostPublicDNSName', value=jenkins_host.instance_public_dns_name, export_name='JenkinsHostPublicDNSName')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) OPENSEARCH_DOMAIN_NAME = cdk.CfnParameter( self, 'OpenSearchDomainName', type='String', description='Amazon OpenSearch Service domain name', default='opensearch-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[a-z]+[A-Za-z0-9\-]+') EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') #XXX: For createing Amazon MWAA in the existing VPC, # remove comments from the below codes and # comments out vpc = aws_ec2.Vpc(..) codes, # then pass -c vpc_name=your-existing-vpc to cdk command # for example, # cdk -c vpc_name=your-existing-vpc syth # # vpc_name = self.node.try_get_context('vpc_name') # vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', # is_default=True, # vpc_name=vpc_name # ) vpc = aws_ec2.Vpc( self, "OpenSearchVPC", max_azs=3, gateway_endpoints={ "S3": aws_ec2.GatewayVpcEndpointOptions( service=aws_ec2.GatewayVpcEndpointAwsService.S3) }) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_bastion_host = aws_ec2.SecurityGroup( self, "BastionHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an bastion host', security_group_name='bastion-host-sg') cdk.Tags.of(sg_bastion_host).add('Name', 'bastion-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_bastion_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') bastion_host = aws_ec2.Instance( self, "BastionHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux(), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_bastion_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) sg_use_opensearch = aws_ec2.SecurityGroup( self, "OpenSearchClientSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch client', security_group_name='use-opensearch-cluster-sg') cdk.Tags.of(sg_use_opensearch).add('Name', 'use-opensearch-cluster-sg') sg_opensearch_cluster = aws_ec2.SecurityGroup( self, "OpenSearchSG", vpc=vpc, allow_all_outbound=True, description='security group for an opensearch cluster', security_group_name='opensearch-cluster-sg') cdk.Tags.of(sg_opensearch_cluster).add('Name', 'opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_opensearch_cluster, connection=aws_ec2.Port.all_tcp(), description='opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp(443), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_use_opensearch, connection=aws_ec2.Port.tcp_range(9200, 9300), description='use-opensearch-cluster-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp(443), description='bastion-host-sg') sg_opensearch_cluster.add_ingress_rule( peer=sg_bastion_host, connection=aws_ec2.Port.tcp_range(9200, 9300), description='bastion-host-sg') master_user_secret = aws_secretsmanager.Secret( self, "OpenSearchMasterUserSecret", generate_secret_string=aws_secretsmanager.SecretStringGenerator( secret_string_template=json.dumps({"username": "******"}), generate_string_key="password", # Master password must be at least 8 characters long and contain at least one uppercase letter, # one lowercase letter, one number, and one special character. password_length=8)) #XXX: aws cdk elastsearch example - https://github.com/aws/aws-cdk/issues/2873 # You should camelCase the property names instead of PascalCase opensearch_domain = aws_opensearchservice.Domain( self, "OpenSearch", domain_name=OPENSEARCH_DOMAIN_NAME.value_as_string, version=aws_opensearchservice.EngineVersion.OPENSEARCH_1_0, capacity={ "master_nodes": 3, "master_node_instance_type": "r6g.large.search", "data_nodes": 3, "data_node_instance_type": "r6g.large.search" }, ebs={ "volume_size": 10, "volume_type": aws_ec2.EbsDeviceVolumeType.GP2 }, #XXX: az_count must be equal to vpc subnets count. zone_awareness={"availability_zone_count": 3}, logging={ "slow_search_log_enabled": True, "app_log_enabled": True, "slow_index_log_enabled": True }, fine_grained_access_control=aws_opensearchservice. AdvancedSecurityOptions( master_user_name=master_user_secret.secret_value_from_json( "username").to_string(), master_user_password=master_user_secret.secret_value_from_json( "password")), # Enforce HTTPS is required when fine-grained access control is enabled. enforce_https=True, # Node-to-node encryption is required when fine-grained access control is enabled node_to_node_encryption=True, # Encryption-at-rest is required when fine-grained access control is enabled. encryption_at_rest={"enabled": True}, use_unsigned_basic_auth=True, security_groups=[sg_opensearch_cluster], automated_snapshot_start_hour=17, # 2 AM (GTM+9) vpc=vpc, vpc_subnets=[ aws_ec2.SubnetSelection( one_per_az=True, subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT) ], removal_policy=cdk.RemovalPolicy. DESTROY # default: cdk.RemovalPolicy.RETAIN ) cdk.Tags.of(opensearch_domain).add( 'Name', f'{OPENSEARCH_DOMAIN_NAME.value_as_string}') cdk.CfnOutput(self, 'BastionHostId', value=bastion_host.instance_id, export_name='BastionHostId') cdk.CfnOutput(self, 'OpenSearchDomainEndpoint', value=opensearch_domain.domain_endpoint, export_name='OpenSearchDomainEndpoint') cdk.CfnOutput( self, 'OpenSearchDashboardsURL', value=f"{opensearch_domain.domain_endpoint}/_dashboards/", export_name='OpenSearchDashboardsURL') cdk.CfnOutput(self, 'MasterUserSecretId', value=master_user_secret.secret_name, export_name='MasterUserSecretId')
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) vpc_name = self.node.try_get_context('vpc_name') vpc = aws_ec2.Vpc.from_lookup(self, 'ExistingVPC', is_default=True, vpc_name=vpc_name) #XXX: create new vpc for msk cluster # vpc = aws_ec2.Vpc(self, 'VpcStack', # max_azs=3, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) MSK_CLUSTER_NAME = cdk.CfnParameter( self, 'KafkaClusterName', type='String', description='Managed Streaming for Apache Kafka cluster name', default='MSK-{}'.format(''.join( random.sample((string.ascii_letters), k=5))), allowed_pattern='[A-Za-z0-9\-]+') KAFA_VERSION = cdk.CfnParameter( self, 'KafkaVersion', type='String', description='Apache Kafka version', default='2.6.2', # Supported Apache Kafka versions # https://docs.aws.amazon.com/msk/latest/developerguide/supported-kafka-versions.html allowed_values=[ '2.8.1', '2.8.0', '2.7.1', '2.6.2', '2.6.1', '2.6.0', '2.5.1', '2.4.1.1', '2.3.1', '2.2.1' ]) #XXX: change broker instance type KAFA_BROKER_INSTANCE_TYPE = cdk.CfnParameter( self, 'KafkaBrokerInstanceType', type='String', description='Apache Kafka Broker instance type', default='kafka.m5.large') #XXX: change volume size KAFA_BROKER_EBS_VOLUME_SIZE = cdk.CfnParameter( self, 'KafkaBrokerEBSVolumeSize', type='Number', description= 'Apache Kafka Broker EBS Volume size (Minimum: 1 GiB, Maximum: 16384 GiB)', default='100', min_value=1, max_value=16384) MSK_CLIENT_SG_NAME = 'use-msk-sg-{}'.format(''.join( random.sample((string.ascii_lowercase), k=5))) sg_use_msk = aws_ec2.SecurityGroup( self, 'KafkaClientSecurityGroup', vpc=vpc, allow_all_outbound=True, description='security group for Amazon MSK client', security_group_name=MSK_CLIENT_SG_NAME) cdk.Tags.of(sg_use_msk).add('Name', MSK_CLIENT_SG_NAME) MSK_CLUSTER_SG_NAME = 'msk-sg-{}'.format(''.join( random.sample((string.ascii_lowercase), k=5))) sg_msk_cluster = aws_ec2.SecurityGroup( self, 'MSKSecurityGroup', vpc=vpc, allow_all_outbound=True, description='security group for Amazon MSK Cluster', security_group_name=MSK_CLUSTER_SG_NAME) sg_msk_cluster.add_ingress_rule( peer=sg_use_msk, connection=aws_ec2.Port.tcp(2181), description='msk client security group') sg_msk_cluster.add_ingress_rule( peer=sg_use_msk, connection=aws_ec2.Port.tcp(9092), description='msk client security group') sg_msk_cluster.add_ingress_rule( peer=sg_use_msk, connection=aws_ec2.Port.tcp(9094), description='msk client security group') cdk.Tags.of(sg_msk_cluster).add('Name', MSK_CLUSTER_SG_NAME) msk_broker_ebs_storage_info = aws_msk.CfnCluster.EBSStorageInfoProperty( volume_size=KAFA_BROKER_EBS_VOLUME_SIZE.value_as_number) msk_broker_storage_info = aws_msk.CfnCluster.StorageInfoProperty( ebs_storage_info=msk_broker_ebs_storage_info) msk_broker_node_group_info = aws_msk.CfnCluster.BrokerNodeGroupInfoProperty( client_subnets=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT).subnet_ids, instance_type=KAFA_BROKER_INSTANCE_TYPE.value_as_string, security_groups=[ sg_use_msk.security_group_id, sg_msk_cluster.security_group_id ], storage_info=msk_broker_storage_info) msk_encryption_info = aws_msk.CfnCluster.EncryptionInfoProperty( encryption_in_transit=aws_msk.CfnCluster. EncryptionInTransitProperty(client_broker='TLS_PLAINTEXT', in_cluster=True)) msk_cluster = aws_msk.CfnCluster( self, 'AWSKafkaCluster', broker_node_group_info=msk_broker_node_group_info, cluster_name=MSK_CLUSTER_NAME.value_as_string, #XXX: Supported Apache Kafka versions # https://docs.aws.amazon.com/msk/latest/developerguide/supported-kafka-versions.html kafka_version=KAFA_VERSION.value_as_string, number_of_broker_nodes=3, encryption_info=msk_encryption_info, enhanced_monitoring='PER_TOPIC_PER_BROKER') amzn_linux = aws_ec2.MachineImage.latest_amazon_linux( generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=aws_ec2.AmazonLinuxEdition.STANDARD, virtualization=aws_ec2.AmazonLinuxVirt.HVM, storage=aws_ec2.AmazonLinuxStorage.GENERAL_PURPOSE, cpu_type=aws_ec2.AmazonLinuxCpuType.X86_64) KAFKA_CLIENT_EC2_SG_NAME = 'kafka-client-ec2-sg-{}'.format(''.join( random.sample((string.ascii_lowercase), k=5))) sg_kafka_client_ec2_instance = aws_ec2.SecurityGroup( self, 'KafkaClientEC2InstanceSG', vpc=vpc, allow_all_outbound=True, description='security group for Kafka Client EC2 Instance', security_group_name=KAFKA_CLIENT_EC2_SG_NAME) cdk.Tags.of(sg_kafka_client_ec2_instance).add( 'Name', KAFKA_CLIENT_EC2_SG_NAME) sg_kafka_client_ec2_instance.add_ingress_rule( peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22)) kafka_client_ec2_instance_role = aws_iam.Role( self, 'KafkaClientEC2InstanceRole', role_name='{}-KafkaClientEC2InstanceRole'.format(self.stack_name), assumed_by=aws_iam.ServicePrincipal('ec2.amazonaws.com'), managed_policies=[ aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore'), aws_iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonMSKReadOnlyAccess') ]) msk_client_ec2_instance = aws_ec2.Instance( self, 'KafkaClientEC2Instance', instance_type=aws_ec2.InstanceType.of( instance_class=aws_ec2.InstanceClass.BURSTABLE2, instance_size=aws_ec2.InstanceSize.MICRO), machine_image=amzn_linux, vpc=vpc, availability_zone=vpc.select_subnets( subnet_type=aws_ec2.SubnetType.PRIVATE_WITH_NAT). availability_zones[0], instance_name='KafkaClientInstance', role=kafka_client_ec2_instance_role, security_group=sg_kafka_client_ec2_instance, vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC)) msk_client_ec2_instance.add_security_group(sg_use_msk) commands = ''' yum update -y yum install python3.7 -y yum install java-1.8.0-openjdk-devel -y cd /home/ec2-user echo "export PATH=.local/bin:$PATH" >> .bash_profile mkdir -p opt cd opt wget https://archive.apache.org/dist/kafka/2.2.1/kafka_2.12-2.2.1.tgz tar -xzf kafka_2.12-2.2.1.tgz ln -nsf kafka_2.12-2.2.1 kafka cd /home/ec2-user wget https://bootstrap.pypa.io/get-pip.py su -c "python3.7 get-pip.py --user" -s /bin/sh ec2-user su -c "/home/ec2-user/.local/bin/pip3 install boto3 --user" -s /bin/sh ec2-user chown -R ec2-user ./opt chgrp -R ec2-user ./opt ''' msk_client_ec2_instance.user_data.add_commands(commands) cdk.CfnOutput(self, 'StackName', value=self.stack_name, export_name='StackName') cdk.CfnOutput(self, 'VpcId', value=vpc.vpc_id, export_name='VpcId') cdk.CfnOutput(self, 'MSKSecurityGroupID', value=sg_msk_cluster.security_group_id, export_name='MSKSecurityGroupID') cdk.CfnOutput(self, 'KafkaClientSecurityGroupID', value=sg_use_msk.security_group_id, export_name='KafkaClientSecurityGroupID') cdk.CfnOutput(self, 'MSKClusterArn', value=msk_cluster.ref, export_name='MSKClusterArn') cdk.CfnOutput(self, 'KafkaClientEC2InstancePublicDNS', value=msk_client_ec2_instance.instance_public_dns_name, export_name='KafkaClientEC2InstancePublicDNS')