def _generate_sam_template_with_assets(self, chalice_out_dir, package_id): # type: (str, str) -> str deployment_zip_path = os.path.join(self._sam_package_dir, 'deployment.zip') sam_deployment_asset = assets.Asset(self, 'ChaliceAppCode', path=deployment_zip_path) sam_template_path = os.path.join(self._sam_package_dir, 'sam.json') sam_template_with_assets_path = os.path.join( chalice_out_dir, '%s.sam_with_assets.json' % package_id) with open(sam_template_path) as sam_template_file: sam_template = json.load(sam_template_file) for function in self._filter_resources( sam_template, 'AWS::Serverless::Function'): function['Properties']['CodeUri'] = { 'Bucket': sam_deployment_asset.s3_bucket_name, 'Key': sam_deployment_asset.s3_object_key } managed_layers = self._filter_resources( sam_template, 'AWS::Serverless::LayerVersion') if len(managed_layers) == 1: layer_filename = os.path.join(self._sam_package_dir, 'layer-deployment.zip') layer_asset = assets.Asset(self, 'ChaliceManagedLayer', path=layer_filename) managed_layers[0]['Properties']['ContentUri'] = { 'Bucket': layer_asset.s3_bucket_name, 'Key': layer_asset.s3_object_key } with open(sam_template_with_assets_path, 'w') as f: f.write(json.dumps(sam_template, indent=2)) return sam_template_with_assets_path
def _emr_spark_step_task(self): # Add a EMR Step to run our pyspark job; an asset with our application will be # created and referenced in the job definition root_path = Path(os.path.dirname(os.path.abspath(__file__))) pyspark_script = root_path.joinpath('pyspark', 'example.py').as_posix() pyspark_example_asset = s3_assets.Asset(self, "PythonScript", path=pyspark_script) sample_spark_step = sfn.Task( self, "RunSparkExample", task=sfnt.EmrAddStep( # the concrete ClusterId will be picked up from the current state of the statem achine cluster_id=sfn.Data.string_at("$.ClusterId"), name="SparkExample", # `command-runner.jar` is a jar from AWS that can be used to execute generic command (like `spark-submit`) # if you write your programs in Java/Scala you can directly insert your jar file here instead of script location jar="command-runner.jar", args=[ "spark-submit", "--deploy-mode", "cluster", "--master", "yarn", f"s3://{pyspark_example_asset.s3_bucket_name}/{pyspark_example_asset.s3_object_key}", ], ), result_path="DISCARD", ) return sample_spark_step
def upload_to_assets_bucket(self, construct_id, file_name): file_path = './assets' file_name = file_name asset_bucket = s3assets.Asset(self, id=f'{construct_id}-AssetBucket', path=os.path.join(file_path, file_name)) return asset_bucket
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) self._role = iam.Role( self, id='role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com')) self._files = s3.Asset(self, id='files', path='files') self._files.grant_read(self._role)
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # The code that defines your stack goes here # bucket = s3.Bucket(self, "sitebucket", bucket_name="fadhil-getting-started-bucket", public_read_access=True, website_index_document="index.html") # core.CfnOutput(self, "sitebucketname", value=bucket.bucket_name) # core.CfnOutput(self, "siteBucketWebsite", value=bucket.bucket_website_url) asset = assets.Asset(self, "SampleAsset", path="./sample-asset/index.html")
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) cfn_role = iam.Role( self, 'CloudFormationRole', assumed_by=iam.ServicePrincipal('cloudformation.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonEC2FullAccess'), iam.ManagedPolicy.from_aws_managed_policy_name( 'IAMFullAccess'), ]) alice = iam.User( self, 'Alice', user_name='alice', password=SecretValue.ssm_secure( parameter_name='/demo/permission-boundary/password', version='2', ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonEC2ReadOnlyAccess'), iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloudFormationFullAccess'), ], ) alice.add_to_principal_policy( iam.PolicyStatement( actions=['iam:PassRole'], resources=[cfn_role.role_arn], )) alice.add_to_principal_policy( iam.PolicyStatement( actions=['iam:ListRoles'], resources=['*'], )) template = assets.Asset( self, 'Template', path='./files/template.yml', readers=[alice, cfn_role], ) CfnOutput(self, 'TemplateUrl', value=template.http_url)
def _update_sam_template(self): deployment_zip_path = os.path.join(self.sam_package_dir, 'deployment.zip') sam_deployment_asset = assets.Asset( self, 'ChaliceAppCode', path=deployment_zip_path) sam_template_path = os.path.join(self.sam_package_dir, 'sam.json') with open(sam_template_path) as sam_template_file: sam_template = json.load(sam_template_file) functions = filter( lambda resource: resource['Type'] == 'AWS::Serverless::Function', sam_template['Resources'].values() ) for function in functions: function['Properties']['CodeUri'] = { 'Bucket': sam_deployment_asset.s3_bucket_name, 'Key': sam_deployment_asset.s3_object_key } return sam_template
def create_api_gateway(self) -> None: """ Create API gateway and lambda integration """ # api_stage = core.CfnParameter(self, id="ApiStage", type=str) openapi_asset = s3_assets.Asset( self, "openapi_asset", path="cbers2stac/openapi/core-item-search-query-integrated.yaml", ) data = core.Fn.transform("AWS::Include", {"Location": openapi_asset.s3_object_url}) definition = apigateway.AssetApiDefinition.from_inline(data) apigateway.SpecRestApi( self, id="stacapi", api_definition=definition, deploy_options=apigateway.StageOptions( logging_level=apigateway.MethodLoggingLevel.INFO), )
def __init__( self, scope: core.Construct, construct_id: str, **kwargs, ) -> None: super().__init__(scope, construct_id, **kwargs) execution_role_arn = core.CfnParameter( self, "ExecutionRoleArn", type="String", description="The SageMaker Studio execution role", ) portfolio_name = core.CfnParameter( self, "PortfolioName", type="String", description="The name of the portfolio", default="SageMaker Organization Templates", ) portfolio_owner = core.CfnParameter( self, "PortfolioOwner", type="String", description="The owner of the portfolio.", default="administrator", ) product_version = core.CfnParameter( self, "ProductVersion", type="String", description="The product version to deploy", default="1.0", ) portfolio = aws_servicecatalog.CfnPortfolio( self, "Portfolio", display_name=portfolio_name.value_as_string, description="Organization templates for AB Testing pipeline", provider_name=portfolio_owner.value_as_string, ) asset = aws_s3_assets.Asset( self, "TemplateAsset", path="./ab-testing-pipeline.yml" ) product = aws_servicecatalog.CfnCloudFormationProduct( self, "Product", name="A/B Testing Deployment Pipeline", description="Amazon SageMaker Project for A/B Testing models", owner=portfolio_owner.value_as_string, provisioning_artifact_parameters=[ aws_servicecatalog.CfnCloudFormationProduct.ProvisioningArtifactPropertiesProperty( name=product_version.value_as_string, info={"LoadTemplateFromURL": asset.s3_url}, ), ], tags=[ core.CfnTag(key="sagemaker:studio-visibility", value="true"), ], ) aws_servicecatalog.CfnPortfolioProductAssociation( self, "ProductAssoication", portfolio_id=portfolio.ref, product_id=product.ref, ) launch_role = aws_iam.Role.from_role_arn( self, "LaunchRole", role_arn=f"arn:{self.partition}:iam::{self.account}:role/service-role/AmazonSageMakerServiceCatalogProductsLaunchRole", ) aws_servicecatalog.CfnLaunchRoleConstraint( self, "LaunchRoleConstraint", portfolio_id=portfolio.ref, product_id=product.ref, role_arn=launch_role.role_arn, description=f"Launch as {launch_role.role_arn}", ) aws_servicecatalog.CfnPortfolioPrincipalAssociation( self, "PortfolioPrincipalAssociation", portfolio_id=portfolio.ref, principal_arn=execution_role_arn.value_as_string, principal_type="IAM", ) # Create the deployment asset as an output to pass to pipeline stack deployment_asset = aws_s3_assets.Asset( self, "DeploymentAsset", path="./deployment_pipeline" ) deployment_asset.grant_read(grantee=launch_role) # Ouput the deployment bucket and key, for input into pipeline stack core.CfnOutput( self, "CodeCommitSeedBucket", value=deployment_asset.s3_bucket_name, ) core.CfnOutput(self, "CodeCommitSeedKey", value=deployment_asset.s3_object_key)
def __init__(self, scope: core.Construct, id: str, vpc: ec2.IVpc, cluster: NeptuneCluster, **kwargs) -> None: super().__init__(scope, id, **kwargs) role = iam.Role( self, 'Ec2Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore'), ], ) config_asset = s3_assets.Asset( self, 'ConfigYaml', path='./files/neptune-remote.yaml', readers=[role], ) sg = ec2.SecurityGroup( self, 'SecurityGroup', vpc=vpc, ) user_data = ec2.UserData.for_linux() user_data.add_commands( 'yum update -y', 'yum install -y java-1.8.0-devel', 'cd ~', # Install the CA certificate 'mkdir /tmp/certs/', 'cp /etc/pki/java/cacerts /tmp/certs/cacerts', 'wget https://www.amazontrust.com/repository/SFSRootCAG2.cer', 'keytool -importcert -alias neptune-ca -keystore /tmp/certs/cacerts -file /root/SFSRootCAG2.cer -noprompt -storepass changeit', # Download Gremlin console 'wget https://archive.apache.org/dist/tinkerpop/3.4.8/apache-tinkerpop-gremlin-console-3.4.8-bin.zip', 'unzip apache-tinkerpop-gremlin-console-3.4.8-bin.zip', # Download default configuration and update endpoint url 'cd apache-tinkerpop-gremlin-console-3.4.8', 'aws s3 cp s3://{bucket}/{key} conf/neptune-remote.yaml'.format( bucket=config_asset.s3_bucket_name, key=config_asset.s3_object_key, ), 'sed -i "s/ENDPOINT_URL/{endpoint_url}/g" conf/neptune-remote.yaml' .format(endpoint_url=cluster.endpoint, ), ) ec2.Instance( self, 'Instance', role=role, vpc=vpc, security_group=sg, user_data=user_data, user_data_causes_replacement=True, instance_type=ec2.InstanceType.of( instance_class=ec2.InstanceClass.BURSTABLE3_AMD, instance_size=ec2.InstanceSize.NANO, ), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, ), ) self.role = role self.security_group = sg
def define_userdata_asset(self, path, filename): full_path = os.path.join(path, filename) if os.path.isfile(full_path): return s3_assets.Asset(self, "UserDataAsset", path=full_path) else: print(f"Could not find {full_path}")
def __init__(self, app: core.App, id: str) -> None: super().__init__(app, id) ################################## # Lambda Timeouts (seconds) & Queue Redrive ################################## lambda_gatherer_timeout = 600 lambda_joiner_timeout = 350 # pa11y's timeout is set to 50, so the lambda is just a little longer lambda_a11y_scan_timeout = 55 max_receive_count = 2 ################################## # S3 Bucket with Domains ################################## asset = aws_s3_assets.Asset( self, 'domain-list', path=os.path.abspath('./domains/domains.csv')) ################################## # Domain Gatherer Lambda and Queue ################################## domain_queue = sqs.Queue( self, 'domain-queue', visibility_timeout=core.Duration.seconds( (max_receive_count + 1) * lambda_gatherer_timeout), dead_letter_queue=sqs.DeadLetterQueue( max_receive_count=max_receive_count, queue=sqs.Queue(self, 'domain-queue-dlq', retention_period=core.Duration.days(5)))) lambda_gatherer = lambda_.Function( self, "domain-gatherer", code=lambda_.Code.from_asset('./lambdas/domain_gatherer'), handler="handler.main", timeout=core.Duration.seconds(lambda_gatherer_timeout), runtime=lambda_.Runtime.PYTHON_3_7, memory_size=150) lambda_gatherer.add_environment('SQS_URL', domain_queue.queue_url) lambda_gatherer.add_environment('BUCKET_NAME', asset.s3_bucket_name) lambda_gatherer.add_environment('OBJECT_KEY', asset.s3_object_key) lambda_gatherer_sqs_exec_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ 'lambda:InvokeFunction', 'sqs:SendMessage', 'sqs:DeleteMessage', 'sqs:SendMessageBatch', 'sqs:SetQueueAttributes', 'sqs:GetQueueAttributes', 'sqs:GetQueueUrl', 'sqs:GetQueueAttributes' ], resources=[domain_queue.queue_arn]) lambda_gatherer.add_to_role_policy(lambda_gatherer_sqs_exec_policy) domain_queue.grant_send_messages(lambda_gatherer) # trigger for 1st and 15th of the month at 18:00 UTC (1pm EST) lambda_gatherer_rule = events.Rule(self, "Lambda Gatherer Rule", schedule=events.Schedule.cron( minute='0', hour='18', day="1,15", month='*', year='*')) lambda_gatherer_rule.add_target( targets.LambdaFunction(lambda_gatherer)) asset.grant_read(lambda_gatherer) ################################## # A11y Scanner Lambda and S3 ################################## layer = lambda_.LayerVersion( self, 'chrome-aws-lambda', code=lambda_.Code.from_asset('./lambdas/chrome_aws_lambda.zip'), compatible_runtimes=[lambda_.Runtime.NODEJS_12_X], description='A layer of chrome-aws-lambda') lambda_a11y_scan = lambda_.Function( self, "a11y-scan", code=lambda_.Code.from_asset('./lambdas/a11y_scan'), handler="index.handler", timeout=core.Duration.seconds(lambda_a11y_scan_timeout), runtime=lambda_.Runtime.NODEJS_12_X, memory_size=1000, layers=[layer]) lambda_a11y_scan.add_event_source( sources.SqsEventSource(domain_queue, batch_size=1)) # create s3 bucket to put results results_bucket = s3.Bucket(self, 'results-bucket', versioned=False, removal_policy=core.RemovalPolicy.DESTROY, block_public_access=s3.BlockPublicAccess( block_public_acls=True, ignore_public_acls=True, block_public_policy=True, restrict_public_buckets=True), lifecycle_rules=[ s3.LifecycleRule( enabled=True, expiration=core.Duration.days(10)) ]) lambda_a11y_scan.add_environment('BUCKET_NAME', results_bucket.bucket_name) results_bucket.grant_put(lambda_a11y_scan) ################################## # Results Joiner Lambda ################################## # create s3 bucket to put site data data_bucket = s3.Bucket(self, 'data-bucket', versioned=False, removal_policy=core.RemovalPolicy.DESTROY, block_public_access=s3.BlockPublicAccess( block_public_acls=True, ignore_public_acls=True, block_public_policy=True, restrict_public_buckets=True)) lambda_joiner = lambda_.Function( self, "results-joiner", code=lambda_.Code.from_asset( './lambda-releases/results_joiner.zip'), handler="handler.main", timeout=core.Duration.seconds(lambda_joiner_timeout), runtime=lambda_.Runtime.PYTHON_3_7, memory_size=400) lambda_joiner.add_environment('DATA_BUCKET_NAME', data_bucket.bucket_name) lambda_joiner.add_environment('RESULTS_BUCKET_NAME', results_bucket.bucket_name) results_bucket.grant_read_write(lambda_joiner) data_bucket.grant_read_write(lambda_joiner) # trigger for 8th and 23rd of the month at 18:00 UTC (1pm EST) lambda_joiner_rule = events.Rule(self, "Lambda Joiner Rule", schedule=events.Schedule.cron( minute='0', hour='18', day="8,23", month='*', year='*')) lambda_joiner_rule.add_target(targets.LambdaFunction(lambda_joiner))
def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) ### Parameters bootstrap_script_args = cdk.CfnParameter(self, 'BootstrapScriptArgs', type='String', default='', description='Space seperated arguments passed to the bootstrap script.' ) # create a VPC vpc = ec2.Vpc(self, 'VPC', cidr='10.0.0.0/16', max_azs=99) # create a private and public subnet per vpc selection = vpc.select_subnets( subnet_type=ec2.SubnetType.PRIVATE ) # Output created subnets for i, public_subnet in enumerate(vpc.public_subnets): cdk.CfnOutput(self, 'PublicSubnet%i' % i, value=public_subnet.subnet_id) for i, private_subnet in enumerate(vpc.private_subnets): cdk.CfnOutput(self, 'PrivateSubnet%i' % i, value=private_subnet.subnet_id) cdk.CfnOutput(self, 'VPCId', value=vpc.vpc_id) # Create a Bucket bucket = s3.Bucket(self, "DataRepository") quickstart_bucket = s3.Bucket.from_bucket_name(self, 'QuickStartBucket', 'aws-quickstart') # Upload Bootstrap Script to that bucket bootstrap_script = assets.Asset(self, 'BootstrapScript', path='scripts/bootstrap.sh' ) # Upload parallel cluster post_install_script to that bucket pcluster_post_install_script = assets.Asset(self, 'PclusterPostInstallScript', path='scripts/post_install_script.sh' ) # Setup CloudTrail cloudtrail.Trail(self, 'CloudTrail', bucket=bucket) # Create a Cloud9 instance # Cloud9 doesn't have the ability to provide userdata # Because of this we need to use SSM run command cloud9_instance = cloud9.Ec2Environment(self, 'Cloud9Env', vpc=vpc, instance_type=ec2.InstanceType(instance_type_identifier='c5.large')) cdk.CfnOutput(self, 'URL', value=cloud9_instance.ide_url) # Create a keypair in lambda and store the private key in SecretsManager c9_createkeypair_role = iam.Role(self, 'Cloud9CreateKeypairRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) c9_createkeypair_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole')) # Add IAM permissions to the lambda role c9_createkeypair_role.add_to_policy(iam.PolicyStatement( actions=[ 'ec2:CreateKeyPair', 'ec2:DeleteKeyPair' ], resources=['*'], )) # Lambda for Cloud9 keypair c9_createkeypair_lambda = _lambda.Function(self, 'C9CreateKeyPairLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(300), role=c9_createkeypair_role, code=_lambda.Code.asset('functions/source/c9keypair'), # code=_lambda.Code.from_bucket( ) c9_createkeypair_provider = cr.Provider(self, "C9CreateKeyPairProvider", on_event_handler=c9_createkeypair_lambda) c9_createkeypair_cr = cfn.CustomResource(self, "C9CreateKeyPair", provider=c9_createkeypair_provider, properties={ 'ServiceToken': c9_createkeypair_lambda.function_arn } ) #c9_createkeypair_cr.node.add_dependency(instance_id) c9_ssh_private_key_secret = secretsmanager.CfnSecret(self, 'SshPrivateKeySecret', secret_string=c9_createkeypair_cr.get_att_string('PrivateKey') ) # The iam policy has a <REGION> parameter that needs to be replaced. # We do it programmatically so future versions of the synth'd stack # template include all regions. with open('iam/ParallelClusterUserPolicy.json') as json_file: data = json.load(json_file) for s in data['Statement']: if s['Sid'] == 'S3ParallelClusterReadOnly': s['Resource'] = [] for r in region_info.RegionInfo.regions: s['Resource'].append('arn:aws:s3:::{0}-aws-parallelcluster*'.format(r.name)) parallelcluster_user_policy = iam.CfnManagedPolicy(self, 'ParallelClusterUserPolicy', policy_document=iam.PolicyDocument.from_json(data)) # Cloud9 IAM Role cloud9_role = iam.Role(self, 'Cloud9Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com')) cloud9_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore')) cloud9_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User')) cloud9_role.add_managed_policy(iam.ManagedPolicy.from_managed_policy_arn(self, 'AttachParallelClusterUserPolicy', parallelcluster_user_policy.ref)) cloud9_role.add_to_policy(iam.PolicyStatement( resources=['*'], actions=[ 'ec2:DescribeInstances', 'ec2:DescribeVolumes', 'ec2:ModifyVolume' ] )) cloud9_role.add_to_policy(iam.PolicyStatement( resources=[c9_ssh_private_key_secret.ref], actions=[ 'secretsmanager:GetSecretValue' ] )) bootstrap_script.grant_read(cloud9_role) pcluster_post_install_script.grant_read(cloud9_role) # Cloud9 User # user = iam.User(self, 'Cloud9User', password=cdk.SecretValue.plain_text('supersecretpassword'), password_reset_required=True) # Cloud9 Setup IAM Role cloud9_setup_role = iam.Role(self, 'Cloud9SetupRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) cloud9_setup_role.add_managed_policy(iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole')) # Allow pcluster to be run in bootstrap cloud9_setup_role.add_managed_policy(iam.ManagedPolicy.from_managed_policy_arn(self, 'AttachParallelClusterUserPolicySetup', parallelcluster_user_policy.ref)) # Add IAM permissions to the lambda role cloud9_setup_role.add_to_policy(iam.PolicyStatement( actions=[ 'cloudformation:DescribeStackResources', 'ec2:AssociateIamInstanceProfile', 'ec2:AuthorizeSecurityGroupIngress', 'ec2:DescribeInstances', 'ec2:DescribeInstanceStatus', 'ec2:DescribeInstanceAttribute', 'ec2:DescribeIamInstanceProfileAssociations', 'ec2:DescribeVolumes', 'ec2:DesctibeVolumeAttribute', 'ec2:DescribeVolumesModifications', 'ec2:DescribeVolumeStatus', 'ssm:DescribeInstanceInformation', 'ec2:ModifyVolume', 'ec2:ReplaceIamInstanceProfileAssociation', 'ec2:ReportInstanceStatus', 'ssm:SendCommand', 'ssm:GetCommandInvocation', 's3:GetObject', 'lambda:AddPermission', 'lambda:RemovePermission', 'events:PutRule', 'events:DeleteRule', 'events:PutTargets', 'events:RemoveTargets', ], resources=['*'], )) cloud9_setup_role.add_to_policy(iam.PolicyStatement( actions=['iam:PassRole'], resources=[cloud9_role.role_arn] )) cloud9_setup_role.add_to_policy(iam.PolicyStatement( actions=[ 'lambda:AddPermission', 'lambda:RemovePermission' ], resources=['*'] )) # Cloud9 Instance Profile c9_instance_profile = iam.CfnInstanceProfile(self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name]) # Lambda to add Instance Profile to Cloud9 c9_instance_profile_lambda = _lambda.Function(self, 'C9InstanceProfileLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9InstanceProfile'), ) c9_instance_profile_provider = cr.Provider(self, "C9InstanceProfileProvider", on_event_handler=c9_instance_profile_lambda, ) instance_id = cfn.CustomResource(self, "C9InstanceProfile", provider=c9_instance_profile_provider, properties={ 'InstanceProfile': c9_instance_profile.ref, 'Cloud9Environment': cloud9_instance.environment_id, } ) instance_id.node.add_dependency(cloud9_instance) # Lambda for Cloud9 Bootstrap c9_bootstrap_lambda = _lambda.Function(self, 'C9BootstrapLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9bootstrap'), ) c9_bootstrap_provider = cr.Provider(self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda) c9_bootstrap_cr = cfn.CustomResource(self, "C9Bootstrap", provider=c9_bootstrap_provider, properties={ 'Cloud9Environment': cloud9_instance.environment_id, 'BootstrapPath': 's3://%s/%s' % (bootstrap_script.s3_bucket_name, bootstrap_script.s3_object_key), 'BootstrapArguments': bootstrap_script_args, 'VPCID': vpc.vpc_id, 'MasterSubnetID': vpc.public_subnets[0].subnet_id, 'ComputeSubnetID': vpc.private_subnets[0].subnet_id, 'PostInstallScriptS3Url': "".join( ['s3://', pcluster_post_install_script.s3_bucket_name, "/", pcluster_post_install_script.s3_object_key ] ), 'PostInstallScriptBucket': pcluster_post_install_script.s3_bucket_name, 'KeyPairId': c9_createkeypair_cr.ref, 'KeyPairSecretArn': c9_ssh_private_key_secret.ref } ) c9_bootstrap_cr.node.add_dependency(instance_id) c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr) c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret)
def __init__( self, scope: cdk.Construct, construct_id: str, domain: SMSDomainStack, **kwargs ) -> None: super().__init__(scope, construct_id, **kwargs) # Create the Lambda Stack for pre-populating the user home directory studio_user_lambda = StudioUserLambda( self, "FnPopulateStudioUser", vpc=domain.vpc, domain=domain.domain ) # Generate the CF template for the studio user stage = cdk.Stage(self, "IntermediateStage") SMSIAMUserStack( stage, "StudioUserStack", synthesizer=cdk.BootstraplessSynthesizer(), ) assembly = stage.synth(force=True) # Retrive the local path of the CF template template_full_path = assembly.stacks[0].template_full_path # Upload CF template to s3 to create an asset to reference s3_asset = s3assets.Asset( self, "TemplateAsset", path=template_full_path, ) # Create the Service Catalog product referencing the CF template sc_product = servicecatalog.CfnCloudFormationProduct( self, "StudioUser", owner="SageMakerStudio", provisioning_artifact_parameters=[ servicecatalog.CfnCloudFormationProduct.ProvisioningArtifactPropertiesProperty( info={"LoadTemplateFromURL": s3_asset.s3_url} ) ], name="StudioUser", ) # Create the Porduct Portfolio sc_portfolio = servicecatalog.CfnPortfolio( self, "SageMakerPortfolio", display_name="SageMakerPortfolio", provider_name="SageMakerTemplate", ) # Associate the Studio User Template to the Portfolio servicecatalog.CfnPortfolioProductAssociation( self, "ProductAssociation", portfolio_id=sc_portfolio.ref, product_id=sc_product.ref, ) # creat a role and associate it with the portfolio sc_role = iam.Role( self, "StudioAdminRole", assumed_by=iam.AnyPrincipal(), role_name="SageMakerStudioAdminRole", managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( "AWSServiceCatalogEndUserFullAccess" ), iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonSageMakerFullAccess" ), ], ) sc_role.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "sagemaker:CreateUserProfile", ], resources=["*"], ) ) sc_role.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "lambda:InvokeFunction", ], resources=[studio_user_lambda.provider.service_token], ) ) sc_role.add_to_policy( iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "s3:GetObject", "s3:ListBucket", ], resources=["*"], ) ) cdk.CfnOutput( self, "SageMakerStudioAdminRole", value=sc_role.role_arn, description="SageMakerStudioAdminRole", # export_name="SageMakerStudioAdminRole", ) servicecatalog.CfnPortfolioPrincipalAssociation( self, "PortfolioPrincipalAssociacion", portfolio_id=sc_portfolio.ref, principal_arn=sc_role.role_arn, principal_type="IAM", )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) stack = core.Stack.of(self) vpc = ec2.Vpc( self, 'Vpc', cidr='10.0.0.0/24', max_azs=2, # Need at least 2 AZs for Neptune nat_gateways=1, # Saving on cost by only using 1 NAT ) # Custom Neptune construct cluster = NeptuneCluster( self, 'Cluster', vpc=vpc, db_instance_class='db.r5.large', ) # An EC2 instance to run commands from instance = Instance( self, 'Instance', vpc=vpc, cluster=cluster, ) # Allow EC2 instance connect to Neptune cluster cluster.security_group.add_ingress_rule(instance.security_group, ec2.Port.tcp(8182)) # Demo files vertices_asset = s3_assets.Asset( self, 'VerticesCsv', path='./files/vertices.csv', readers=[cluster.role], ) edges_asset = s3_assets.Asset( self, 'EdgesCsv', path='./files/edges.csv', readers=[cluster.role], ) core.CfnOutput( self, 'Command1LoadVertices', value='curl -X POST -H \'{headers}\' {url} -d \'{request_body}\''. format( headers='Content-Type: application/json', url='https://{endpoint}:8182/loader'.format( endpoint=cluster.endpoint), request_body=json_encode({ 'failOnError': 'FALSE', 'format': 'csv', 'region': stack.region, 'iamRoleArn': cluster.role.role_arn, 'source': 's3://{bucket}/{key}'.format( bucket=vertices_asset.s3_bucket_name, key=vertices_asset.s3_object_key, ), }), )) core.CfnOutput( self, 'Command2LoadEdges', value='curl -X POST -H \'{headers}\' {url} -d \'{request_body}\''. format( headers='Content-Type: application/json', url='https://{endpoint}:8182/loader'.format( endpoint=cluster.endpoint), request_body=json_encode({ 'failOnError': 'FALSE', 'format': 'csv', 'region': stack.region, 'iamRoleArn': cluster.role.role_arn, 'source': 's3://{bucket}/{key}'.format( bucket=edges_asset.s3_bucket_name, key=edges_asset.s3_object_key, ), }), )) core.CfnOutput( self, 'Command3ListAllVertices', value=':remote connect tinkerpop.server conf/neptune-remote.yaml', ) core.CfnOutput( self, 'Command4ListAllVertices', value=':remote console', ) core.CfnOutput( self, 'Command5ListAllGamers', value='g.V().hasLabel("person")', ) core.CfnOutput( self, 'Command6ListAllGamers', value='g.V().hasLabel("game").groupCount().by("GameGenre")', ) core.CfnOutput( self, 'Command7ListAllGamers', value= 'g.V().has("GamerAlias","groundWalker").as("TargetGamer").out("likes").aggregate("self").in("likes").where(neq("TargetGamer")).out("likes").where(without("self")).dedup().values("GameTitle")', )
def __init__( self, scope: core.Construct, id: str, source_directory: Union[Path, str], stage_config: Optional[dict] = None, lambda_configs: Optional[dict] = None, environment: Optional[dict] = None, **kwargs, ): """ Args: scope: cdk stack or construct id: identifier source_directory: the output directory of `chalice package` or the base path of the chalice codebase environment: environment variables to apply across lambdas stage_config: stage-level configuration options i.e. `api_gateway_endpoint_type` overwrites `dev` lambda_configs: lambda-level configurations, will be passed to `lambda_functions` in `dev` **kwargs: """ super().__init__(scope, id, **kwargs) stage_config = stage_config if stage_config is not None else {} lambda_configs = lambda_configs if lambda_configs is not None else {} environment = environment if environment is not None else {} source_path = Path(source_directory) if Path(source_path, "app.py").exists(): logging.debug("assuming app has not been packaged") config_path = Path(source_path, ".chalice", "config.json") original_config_text = config_path.read_text() config_data = json.loads(original_config_text) config_data["stages"]["dev"].update(stage_config) if lambda_configs: config_data["stages"]["dev"]["lambda_functions"] = { **config_data["stages"]["dev"].get("lambda_function", {}), **lambda_configs, } updated_config = json.dumps(config_data, indent=2) logging.debug(updated_config) config_path.write_text(updated_config) output_dir = "chalice.out" sp.run(f"chalice package {output_dir}", shell=True, check=True) config_path.write_text(original_config_text) package_path = Path(output_dir) else: package_path = Path(source_directory) sam_path = Path(package_path, "sam.json") text = sam_path.read_text() self.template = json.loads(text) zip_path = Path(package_path, "deployment.zip") s3_asset = aws_s3_assets.Asset(self, "chalice-app-s3-object", path=zip_path.__fspath__()) for resource_name, resource in self.template["Resources"].items(): if resource["Type"] == "AWS::Serverless::Function": properties = resource["Properties"] properties["CodeUri"] = { "Bucket": s3_asset.s3_bucket_name, "Key": s3_asset.s3_object_key, } properties.setdefault("Environment", {}).setdefault("Variables", {}).update(environment) core.CfnInclude(self, "chalice-app", template=self.template)
def __init__( self, scope: core.Construct, id: str, vpc_stack, kafka_stack, **kwargs ) -> None: super().__init__(scope, id, **kwargs) # log generator asset log_generator_py = assets.Asset( self, "log_generator", path=os.path.join(dirname, "log_generator.py") ) # log generator requirements.txt asset log_generator_requirements_txt = assets.Asset( self, "log_generator_requirements_txt", path=os.path.join(dirname, "log_generator_requirements.txt"), ) # get kakfa brokers kafka_brokers = f'''"{kafka_get_brokers().replace(",", '", "')}"''' # update filebeat.yml to .asset filebeat_yml_asset = file_updated( os.path.join(dirname, "filebeat.yml"), {"$kafka_brokers": kafka_brokers}, ) filebeat_yml = assets.Asset(self, "filebeat_yml", path=filebeat_yml_asset) elastic_repo = assets.Asset( self, "elastic_repo", path=os.path.join(dirname, "elastic.repo") ) # userdata for Filebeat fb_userdata = user_data_init(log_group_name="elkk/filebeat/instance") # instance for Filebeat fb_instance = ec2.Instance( self, "filebeat_client", instance_type=ec2.InstanceType(constants["FILEBEAT_INSTANCE"]), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2 ), vpc=vpc_stack.get_vpc, vpc_subnets={"subnet_type": ec2.SubnetType.PUBLIC}, key_name=constants["KEY_PAIR"], security_group=kafka_stack.get_kafka_client_security_group, user_data=fb_userdata, ) core.Tag.add(fb_instance, "project", constants["PROJECT_TAG"]) # create policies for EC2 to connect to kafka access_kafka_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["kafka:ListClusters", "kafka:GetBootstrapBrokers",], resources=["*"], ) # add the role permissions fb_instance.add_to_role_policy(statement=access_kafka_policy) # add log permissions instance_add_log_permissions(fb_instance) # add access to the file asset filebeat_yml.grant_read(fb_instance) elastic_repo.grant_read(fb_instance) log_generator_py.grant_read(fb_instance) log_generator_requirements_txt.grant_read(fb_instance) # add commands to the userdata fb_userdata.add_commands( # get setup assets files f"aws s3 cp s3://{filebeat_yml.s3_bucket_name}/{filebeat_yml.s3_object_key} /home/ec2-user/filebeat.yml", f"aws s3 cp s3://{elastic_repo.s3_bucket_name}/{elastic_repo.s3_object_key} /home/ec2-user/elastic.repo", f"aws s3 cp s3://{log_generator_py.s3_bucket_name}/{log_generator_py.s3_object_key} /home/ec2-user/log_generator.py", f"aws s3 cp s3://{log_generator_requirements_txt.s3_bucket_name}/{log_generator_requirements_txt.s3_object_key} /home/ec2-user/requirements.txt", # get python3 "yum install python3 -y", # get pip "yum install python-pip -y", # make log generator executable "chmod +x /home/ec2-user/log_generator.py", # get log generator requirements "python3 -m pip install -r /home/ec2-user/requirements.txt", # Filebeat "rpm --import https://packages.elastic.co/GPG-KEY-elasticsearch", # move Filebeat repo file "mv -f /home/ec2-user/elastic.repo /etc/yum.repos.d/elastic.repo", # install Filebeat "yum install filebeat -y", # move filebeat.yml to final location "mv -f /home/ec2-user/filebeat.yml /etc/filebeat/filebeat.yml", # update log generator ownership "chown -R ec2-user:ec2-user /home/ec2-user", # start Filebeat "systemctl start filebeat", ) # add the signal fb_userdata.add_signal_on_exit_command(resource=fb_instance) # attach the userdata fb_instance.add_user_data(fb_userdata.render()) # add creation policy for instance fb_instance.instance.cfn_options.creation_policy = core.CfnCreationPolicy( resource_signal=core.CfnResourceSignal(count=1, timeout="PT10M") )
def __init__(self, app: core.App, cfn_name: str, stack_env): super().__init__(scope=app, id=f"{cfn_name}-{stack_env}") glue_code = s3_assets.Asset( scope=self, id=f"{cfn_name}-glue-script", path="./glue_script/glue_job_script.py", ) glue_s3_access_role = iam.Role( scope=self, id=f"glue_s3_access_role_{stack_env}", role_name=f"glue_s3_access_role_{stack_env}", assumed_by=iam.ServicePrincipal("glue.amazonaws.com")) # add policy to access S3 glue_s3_access_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["*"], actions=["s3:*"])) # add policy to access CloudWatch Logs glue_s3_access_role.add_to_policy( iam.PolicyStatement(effect=iam.Effect.ALLOW, resources=["arn:aws:logs:*:*:*"], actions=[ "logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams" ])) # glue # specify the name, because `the name` deployed cannot be obtained. glue_job_name = f"{cfn_name}-glue-job" _ = glue.CfnJob( scope=self, id=glue_job_name, name=glue_job_name, command=glue.CfnJob.JobCommandProperty( # glueetl or pythonshell name=self.GLUE_JOB_COMMAND_GLUE_ETL, script_location= f"s3://{glue_code.s3_bucket_name}/{glue_code.s3_object_key}"), # set role-name! role=glue_s3_access_role.role_name, glue_version=self.GLUE_VERSION_2_0, number_of_workers=2, worker_type=self.GLUE_WORKER_TYPE_STANDARD, timeout=1800) # StepFunction Tasks sfn_task_pass = sfn.Pass(scope=self, id=f"{cfn_name}-sfn-pass", comment="pass example", input_path="$", result_path="$.source", result=sfn.Result.from_string("example"), output_path="$") # wait until the JOB completed: sfn.IntegrationPattern.RUN_JOB # process next step without waiting: sfn.IntegrationPattern.REQUEST_RESPONSE sfn_task_glue_job = sfn_tasks.GlueStartJobRun( scope=self, id=f"{cfn_name}-sfn-lambda-task", glue_job_name=glue_job_name, integration_pattern=sfn.IntegrationPattern.RUN_JOB, input_path="$", result_path="$.result", output_path="$.output") # stepfunctions definition = sfn_task_pass.next(sfn_task_glue_job) _ = sfn.StateMachine(scope=self, id=f"{cfn_name}-SFn-{stack_env}", definition=definition)
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) EC2_KEY_PAIR_NAME = cdk.CfnParameter( self, 'EC2KeyPairName', type='String', description='Amazon EC2 Instance KeyPair name') vpc_name = self.node.try_get_context("vpc_name") vpc = aws_ec2.Vpc.from_lookup(self, "ExistingVPC", is_default=True, vpc_name=vpc_name) # vpc = aws_ec2.Vpc(self, "JenkinsOnEC2Stack", # max_azs=2, # gateway_endpoints={ # "S3": aws_ec2.GatewayVpcEndpointOptions( # service=aws_ec2.GatewayVpcEndpointAwsService.S3 # ) # } # ) #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceClass.html #XXX: https://docs.aws.amazon.com/cdk/api/latest/python/aws_cdk.aws_ec2/InstanceSize.html#aws_cdk.aws_ec2.InstanceSize ec2_instance_type = aws_ec2.InstanceType.of( aws_ec2.InstanceClass.BURSTABLE3, aws_ec2.InstanceSize.MEDIUM) sg_jenkins_host = aws_ec2.SecurityGroup( self, "JenkinsHostSG", vpc=vpc, allow_all_outbound=True, description='security group for an jenkins host', security_group_name='jenkins-host-sg') cdk.Tags.of(sg_jenkins_host).add('Name', 'jenkins-host-sg') #TODO: SHOULD restrict IP range allowed to ssh acces sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(22), description='SSH access') sg_jenkins_host.add_ingress_rule(peer=aws_ec2.Peer.ipv4("0.0.0.0/0"), connection=aws_ec2.Port.tcp(80), description='HTTP access') jenkins_host = aws_ec2.Instance( self, "JenkinsHost", vpc=vpc, instance_type=ec2_instance_type, machine_image=aws_ec2.MachineImage.latest_amazon_linux( generation=aws_ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=aws_ec2.AmazonLinuxEdition.STANDARD, kernel=aws_ec2.AmazonLinuxKernel.KERNEL5_X), vpc_subnets=aws_ec2.SubnetSelection( subnet_type=aws_ec2.SubnetType.PUBLIC), security_group=sg_jenkins_host, key_name=EC2_KEY_PAIR_NAME.value_as_string) # Script in S3 as Asset user_data_asset = aws_s3_assets.Asset( self, "JenkinsEC2UserData", path=os.path.join(os.path.dirname(__file__), "user-data/install_jenkins.sh")) local_path = jenkins_host.user_data.add_s3_download_command( bucket=user_data_asset.bucket, bucket_key=user_data_asset.s3_object_key) # Userdata executes script from S3 jenkins_host.user_data.add_execute_file_command(file_path=local_path) user_data_asset.grant_read(jenkins_host.role) cdk.CfnOutput(self, 'JenkinsHostId', value=jenkins_host.instance_id, export_name='JenkinsHostId') cdk.CfnOutput(self, 'JenkinsHostPublicDNSName', value=jenkins_host.instance_public_dns_name, export_name='JenkinsHostPublicDNSName')
def __init__( self, scope: cdk.Construct, construct_id: str, stack_log_level: str, glue_db_name: str, glue_table_name: str, etl_bkt, src_stream, **kwargs, ) -> None: super().__init__(scope, construct_id, **kwargs) self.template_options.metadata = {"License": "Miztiik Corp."} # Glue Job IAM Role self._glue_etl_role = _iam.Role( self, "glueJobRole", assumed_by=_iam.ServicePrincipal("glue.amazonaws.com"), managed_policies=[ _iam.ManagedPolicy.from_aws_managed_policy_name( "AmazonS3ReadOnlyAccess"), _iam.ManagedPolicy.from_aws_managed_policy_name( "service-role/AWSGlueServiceRole") ]) self._glue_etl_role.add_to_policy( _iam.PolicyStatement( actions=["s3:*"], resources=[f"{etl_bkt.bucket_arn}", f"{etl_bkt.bucket_arn}/*"])) self._glue_etl_role.add_to_policy( _iam.PolicyStatement(actions=["kinesis:DescribeStream"], resources=[f"{src_stream.stream_arn}"])) src_stream.grant_read(self._glue_etl_role) # Create the Glue job to convert incoming JSON to parquet # Read GlueSpark Code try: with open( "stacks/back_end/glue_stacks/glue_job_scripts/kinesis_streams_batch_to_s3_etl.py", encoding="utf-8", mode="r", ) as f: kinesis_streams_batch_to_s3_etl = f.read() except OSError: print("Unable to read Glue Job Code") raise etl_script_asset = _s3_assets.Asset( self, "etlScriptAsset", path= "stacks/back_end/glue_stacks/glue_job_scripts/kinesis_streams_batch_to_s3_etl.py" ) self.etl_prefix = "stream-etl" _glue_etl_job = _glue.CfnJob( self, "glueJsonToParquetJob", name="stream-etl-processor", description= "Glue Job to process stream of events from Kinesis data stream and store them in parquet format in S3", role=self._glue_etl_role.role_arn, glue_version="2.0", command=_glue.CfnJob.JobCommandProperty( name="gluestreaming", script_location= f"s3://{etl_script_asset.s3_bucket_name}/{etl_script_asset.s3_object_key}", python_version="3"), default_arguments={ "--src_db_name": glue_db_name, "--src_tbl_name": glue_table_name, "--datalake_bkt_name": etl_bkt.bucket_name, "--datalake_bkt_prefix": f"{self.etl_prefix}/", "--job-bookmark-option": "job-bookmark-enable" }, allocated_capacity=1, # timeout=2, max_retries=2, execution_property=_glue.CfnJob.ExecutionPropertyProperty( max_concurrent_runs=1)) # Configure a Trigger - Every hour _glue_etl_job_trigger = _glue.CfnTrigger( self, "glueEtlJobtrigger", type="SCHEDULED", description= "Miztiik Automation: Trigger streaming etl glue job every hour", schedule="cron(0 1 * * ? *)", start_on_creation=False, actions=[ _glue.CfnTrigger.ActionProperty( job_name=f"{_glue_etl_job.name}", timeout=2) ]) _glue_etl_job_trigger.add_depends_on(_glue_etl_job) ########################################### ################# OUTPUTS ################# ########################################### output_0 = cdk.CfnOutput( self, "AutomationFrom", value=f"{GlobalArgs.SOURCE_INFO}", description= "To know more about this automation stack, check out our github page.", ) output_1 = cdk.CfnOutput( self, "StreamingETLGlueJob", value= f"https://console.aws.amazon.com/gluestudio/home?region={cdk.Aws.REGION}#/jobs", description="Glue ETL Job.", )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) domain_name = self.node.try_get_context('domain_name') subdomain = 'enclave.{}'.format(domain_name) zone = route53.HostedZone.from_lookup( self, 'Zone', domain_name=domain_name, ) certificate = acm.DnsValidatedCertificate( self, 'Certificate', domain_name=subdomain, hosted_zone=zone, ) vpc = ec2.Vpc( self, 'Vpc', cidr='10.11.12.0/24', max_azs=2, # Only need public IPs, so no need for private subnets subnet_configuration=[ ec2.SubnetConfiguration(name='public', subnet_type=ec2.SubnetType.PUBLIC) ]) role = iam.Role( self, 'Ec2SsmRole', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore') ], ) role.add_to_policy( iam.PolicyStatement( actions=['ec2:AssociateEnclaveCertificateIamRole'], resources=[ certificate.certificate_arn, role.role_arn, ], )) role.add_to_policy( iam.PolicyStatement( actions=['s3:GetObject'], resources=['arn:aws:s3:::aws-ec2-enclave-certificate-*/*'], )) role.add_to_policy( iam.PolicyStatement( actions=['kms:Decrypt'], resources=['arn:aws:kms:*:*:key/*'], )) role.add_to_policy( iam.PolicyStatement( actions=['iam:GetRole'], resources=[role.role_arn], )) nginx_config = s3_assets.Asset( self, 'NginxConfig', path='./files/nginx.conf', readers=[role], ) enclave_config = s3_assets.Asset( self, 'EncalveConfig', path='./files/acm.yaml', readers=[role], ) # Source: https://docs.aws.amazon.com/enclaves/latest/user/nitro-enclave-refapp.html user_data = ec2.UserData.for_linux() user_data.add_commands( 'curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"', 'unzip awscliv2.zip', './aws/install', '/usr/local/bin/aws ec2 associate-enclave-certificate-iam-role --certificate-arn {certificate_arn} --role-arn {role_arn} --region {region}' .format( certificate_arn=certificate.certificate_arn, role_arn=role.role_arn, region=self.region, ), 'aws s3 cp s3://{bucket}/{key} /etc/nginx/nginx.conf'.format( bucket=nginx_config.s3_bucket_name, key=nginx_config.s3_object_key, ), 'sed -i "s+DOMAIN_NAME+{domain_name}+g" /etc/nginx/nginx.conf'. format(domain_name=subdomain, ), 'aws s3 cp s3://{bucket}/{key} /etc/nitro_enclaves/acm.yaml'. format( bucket=enclave_config.s3_bucket_name, key=enclave_config.s3_object_key, ), 'sed -i "s+CERTIFICATE_ARN+{certificate_arn}+g" /etc/nitro_enclaves/acm.yaml' .format(certificate_arn=certificate.certificate_arn, ), 'systemctl start nitro-enclaves-acm.service', 'systemctl enable nitro-enclaves-acm', ) instance = ec2.Instance( self, 'Instance', role=role, vpc=vpc, user_data=user_data, # AWS Marketplace AMI: AWS Certificate Manager for Nitro Enclaves # Source: https://aws.amazon.com/marketplace/server/configuration?productId=3f5ee4f8-1439-4bce-ac57-e794a4ca82f9&ref_=psb_cfg_continue machine_image=ec2.MachineImage.lookup( name='ACM-For-Nitro-Enclaves-*', owners=['679593333241'], ), # Nitro Enclaves requires at least 4 vCPUs and does not run on Graviton instance_type=ec2.InstanceType.of( instance_class=ec2.InstanceClass.COMPUTE5_AMD, instance_size=ec2.InstanceSize.XLARGE, ), ) # Unsupported property by CDK instance.instance.enclave_options = {'enabled': True} # Allow inbound HTTPS requests instance.connections.allow_from_any_ipv4(ec2.Port.tcp(443)) # CDK route53 construct does not support EC2 instance as target route53.CfnRecordSet( self, 'DnsRecord', name=subdomain, type='A', ttl='60', resource_records=[instance.instance_public_ip], hosted_zone_id=zone.hosted_zone_id, )
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # store dynamodb_table = dynamodb.Table( self, 'dynamodb_table', table_name=f'{PROJECT}_{STAGE}', partition_key=dynamodb.Attribute( name='date', type=dynamodb.AttributeType.STRING), billing_mode=dynamodb.BillingMode.PAY_PER_REQUEST, point_in_time_recovery=False, removal_policy=core.RemovalPolicy.DESTROY, server_side_encryption=True, ) # public api public_api = appsync.CfnGraphQLApi( self, 'public_api', name=f'{PROJECT}_{STAGE}', authentication_type='API_KEY', ) now = time.localtime() epoch = time.mktime(now) public_api_key = appsync.CfnApiKey( self, 'public_api_key', api_id=public_api.attr_api_id, expires=epoch + core.Duration.days(90).to_seconds(), ) with open('schema.gql', mode='r') as f: graphql_schema = f.read() appsync.CfnGraphQLSchema(self, 'public_api_schema', api_id=public_api.attr_api_id, definition=graphql_schema) public_api_role = iam.Role( self, 'public_api_role', assumed_by=iam.ServicePrincipal('appsync.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonDynamoDBFullAccess') ], ) public_api_datasource = appsync.CfnDataSource( self, 'public_api_datasource', api_id=public_api.attr_api_id, name=f'{PROJECT}_{STAGE}_dynamodb', type='AMAZON_DYNAMODB', dynamo_db_config={ 'awsRegion': 'us-east-1', 'tableName': dynamodb_table.table_name, }, service_role_arn=public_api_role.role_arn, ) with open('mapping_templates/get_holiday.json', mode='r') as f: get_holiday_json = f.read() appsync.CfnResolver( self, 'public_api_resolver_get_holiday', api_id=public_api.attr_api_id, type_name='Query', field_name='getHoliday', data_source_name=public_api_datasource.attr_name, kind='UNIT', request_mapping_template=get_holiday_json, response_mapping_template='$util.toJson($context.result)', ) with open('mapping_templates/list_holidays.json', mode='r') as f: list_holidays_json = f.read() appsync.CfnResolver( self, 'public_api_resolver_list_holidays', api_id=public_api.attr_api_id, type_name='Query', field_name='listHolidays', data_source_name=public_api_datasource.attr_name, kind='UNIT', request_mapping_template=list_holidays_json, response_mapping_template='$util.toJson($context.result)', ) # lambda source code upload to s3 lambda_assets = s3_assets.Asset(self, 'lambda_assets', path='./function/.artifact/') # update function func_api = lambda_.Function( self, f'{PROJECT}-{STAGE}-func', function_name=f'{PROJECT}-{STAGE}-func', code=lambda_.Code.from_bucket(bucket=lambda_assets.bucket, key=lambda_assets.s3_object_key), handler='app.handler', runtime=lambda_.Runtime.PYTHON_3_7, timeout=core.Duration.seconds(120), log_retention=logs.RetentionDays.SIX_MONTHS, memory_size=128, tracing=lambda_.Tracing.ACTIVE, ) func_api.add_environment('TABLE_NAME', dynamodb_table.table_name) func_api.add_environment('CSV_URL', CSV_URL) func_api.add_to_role_policy( iam.PolicyStatement( actions=[ 'dynamodb:Get*', 'dynamodb:Put*', 'dynamodb:Batch*', ], resources=[dynamodb_table.table_arn], )) # schedule execute events.Rule( self, f'{PROJECT}-{STAGE}-schedule', enabled=True, schedule=events.Schedule.rate(core.Duration.days(10)), targets=[events_targets.LambdaFunction(func_api)], ) # lambda@edge func_lambdaedge = lambda_.Function( self, f'{PROJECT}-{STAGE}-func-lambdaedge', function_name=f'{PROJECT}-{STAGE}-func-lambdaedge', code=lambda_.Code.from_inline( open('./function/src/lambdaedge.py').read().replace( '__X_API_KEY__', public_api_key.attr_api_key)), handler='index.handler', runtime=lambda_.Runtime.PYTHON_3_7, timeout=core.Duration.seconds(30), memory_size=128, role=iam.Role( self, f'{PROJECT}-{STAGE}-func-lambdaedge-role', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('edgelambda.amazonaws.com'), iam.ServicePrincipal('lambda.amazonaws.com'), ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole'), ], ), ) lambdaedge_version = func_lambdaedge.add_version( hashlib.sha256( open('./function/src/lambdaedge.py').read().replace( '__X_API_KEY__', public_api_key.attr_api_key).encode()).hexdigest()) # ACM certificates = acm.Certificate( self, 'certificates', domain_name=DOMAIN, validation_method=acm.ValidationMethod.DNS, ) # CDN cdn = cloudfront.CloudFrontWebDistribution( self, f'{PROJECT}-{STAGE}-cloudfront', origin_configs=[ cloudfront.SourceConfiguration( behaviors=[ # default behavior cloudfront.Behavior( allowed_methods=cloudfront. CloudFrontAllowedMethods.ALL, default_ttl=core.Duration.seconds(0), max_ttl=core.Duration.seconds(0), min_ttl=core.Duration.seconds(0), is_default_behavior=True, lambda_function_associations=[ cloudfront.LambdaFunctionAssociation( event_type=cloudfront.LambdaEdgeEventType. ORIGIN_REQUEST, lambda_function=lambdaedge_version, ), ]) ], custom_origin_source=cloudfront.CustomOriginConfig( domain_name=core.Fn.select( 2, core.Fn.split('/', public_api.attr_graph_ql_url)), ), ) ], alias_configuration=cloudfront.AliasConfiguration( acm_cert_ref=certificates.certificate_arn, names=[DOMAIN], security_policy=cloudfront.SecurityPolicyProtocol. TLS_V1_2_2018, ), price_class=cloudfront.PriceClass.PRICE_CLASS_ALL, ) core.CfnOutput( self, 'cloudfront-domain', value=cdn.domain_name, )
def __init__(self, scope: core.Construct, id: str, vpc: ec2.IVpc, cluster: neptune.CfnDBCluster, **kwargs) -> None: super().__init__(scope, id, **kwargs) role = iam.Role( self, 'Ec2Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore'), ], ) config_asset = s3_assets.Asset( self, 'ConfigYaml', path='./files/neptune-remote.yaml', readers=[role], ) sg = ec2.SecurityGroup( self, 'SecurityGroup', vpc=vpc, ) user_data = ec2.UserData.for_linux() user_data.add_commands( 'yum update -y', 'yum install -y java-1.8.0-devel', 'cd ~', # Execute subsequent commands in home directory 'wget https://archive.apache.org/dist/tinkerpop/3.4.1/apache-tinkerpop-gremlin-console-3.4.1-bin.zip', 'unzip apache-tinkerpop-gremlin-console-3.4.1-bin.zip', 'cd apache-tinkerpop-gremlin-console-3.4.1', 'wget https://www.amazontrust.com/repository/SFSRootCAG2.pem', 'aws s3 cp s3://{bucket}/{key} conf/neptune-remote.yaml'.format( bucket=config_asset.s3_bucket_name, key=config_asset.s3_object_key, ), 'sed -i "s/ENDPOINT_URL/{endpoint_url}/g" conf/neptune-remote.yaml' .format(endpoint_url=cluster.endpoint, ), 'systemctl start awslogsd', ) ec2.Instance( self, 'Instance', role=role, vpc=vpc, security_group=sg, user_data=user_data, instance_type=ec2.InstanceType.of( instance_class=ec2.InstanceClass.BURSTABLE3_AMD, instance_size=ec2.InstanceSize.NANO, ), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, ), ) self.role = role self.security_group = sg
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # IAM resources function_role = iam.Role( self, 'LambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'), ], ) function_role.add_to_policy( iam.PolicyStatement( actions=['ec2:TerminateInstances'], resources=['*'], ) ) instance_role = iam.Role( self, 'Ec2Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('AmazonSSMManagedInstanceCore'), iam.ManagedPolicy.from_aws_managed_policy_name('CloudWatchAgentAdminPolicy'), ], ) # Lambda resources function = lambda_.Function( self, 'Shutdown', runtime=lambda_.Runtime.PYTHON_3_7, # Current version on my machines code=lambda_.Code.from_asset('files/shutdown'), handler='index.handler', role=function_role, ) # Log resources awslogs_config = s3_assets.Asset( self, 'AwslogsConfig', path='./files/awslogs.conf', readers=[instance_role], ) log_group = logs.LogGroup( self, 'LogSecure', removal_policy=core.RemovalPolicy.DESTROY, ) logs.SubscriptionFilter( self, 'SshdSession', log_group=log_group, filter_pattern=logs.FilterPattern.all_terms('sshd', 'session opened'), destination=logs_destinations.LambdaDestination(function) ) ## EC2 resources vpc = ec2.Vpc( self, 'Vpc', cidr='10.0.0.0/24', subnet_configuration=[ ec2.SubnetConfiguration( name='Public', subnet_type=ec2.SubnetType.PUBLIC, ) ], ) key_pair = core.CfnParameter( self, 'KeyPair', type='AWS::EC2::KeyPair::KeyName', ) # https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/EC2NewInstanceCWL.html user_data = ec2.UserData.for_linux() user_data.add_commands( 'yum update -y', 'yum install -y awslogs', 'aws s3 cp s3://{bucket}/{key} /etc/awslogs/awslogs.conf'.format( bucket=awslogs_config.s3_bucket_name, key=awslogs_config.s3_object_key, ), 'sed -i "s/LOG_GROUP_NAME/{log_group_name}/g" /etc/awslogs/awslogs.conf'.format( log_group_name=log_group.log_group_name, ), 'sed -i "s/us-east-1/{region}/g" /etc/awslogs/awscli.conf'.format( region=self.region, ), 'systemctl start awslogsd', ) # Using an autoscaling group to utilize the rolling update asg = autoscaling.AutoScalingGroup( self, 'Instance', role=instance_role, vpc=vpc, user_data=user_data, key_name=key_pair.value_as_string, instance_type=ec2.InstanceType.of( instance_class=ec2.InstanceClass.BURSTABLE4_GRAVITON, instance_size=ec2.InstanceSize.NANO, ), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, edition=ec2.AmazonLinuxEdition.STANDARD, cpu_type=ec2.AmazonLinuxCpuType.ARM_64, ), min_capacity=3, max_capacity=3, update_type=autoscaling.UpdateType.ROLLING_UPDATE, rolling_update_configuration=autoscaling.RollingUpdateConfiguration( max_batch_size=3, ) ) asg.connections.allow_from_any_ipv4(ec2.Port.tcp(22))
def __init__(self, scope: core.Construct, construct_id: str, stream: kinesis.IStream, kda_path: str, database: timestream.CfnDatabase, table: timestream.CfnTable, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) asset = assets.Asset(self, "flink-source", path=kda_path) log_group = logs.LogGroup(self, "KdaLogGroup", retention=RetentionDays.FIVE_DAYS, removal_policy=RemovalPolicy.DESTROY) log_stream = log_group.add_stream("KdaLogStream") kda_role = iam.Role( self, "KdaRole", assumed_by=iam.ServicePrincipal("kinesisanalytics.amazonaws.com"), ) asset.grant_read(kda_role) stream.grant_read(kda_role) cloudwatch.Metric.grant_put_metric_data(kda_role) log_group.grant(kda_role, "logs:DescribeLogStreams") log_group.grant_write(kda_role) kda_role.add_to_policy( iam.PolicyStatement(actions=[ "timestream:DescribeEndpoints", "timestream:ListTables", "timestream:ListDatabases", "timestream:DescribeTable", "timestream:DescribeDatabase", ], resources=["*"])) kda_role.add_to_policy( iam.PolicyStatement(actions=["timestream:*Database"], resources=[database.attr_arn])) kda_role.add_to_policy( iam.PolicyStatement( actions=["timestream:*Table", "timestream:WriteRecords"], resources=[table.attr_arn])) kda_role.add_to_policy( iam.PolicyStatement(actions=["kms:DescribeKey"], resources=["*"])) kda_role.add_to_policy( iam.PolicyStatement(actions=["kms:CreateGrant"], resources=["*"], conditions={ "ForAnyValue:StringEquals": { "kms:EncryptionContextKeys": "aws:timestream:database-name" }, "Bool": { "kms:GrantIsForAWSResource": True }, "StringLike": { "kms:ViaService": "timestream.*.amazonaws.com" } })) kda_role.add_to_policy( iam.PolicyStatement(actions=["kinesis:ListShards"], resources=[stream.stream_arn])) self._asset = asset self._kda_role = kda_role self._log_group_name = log_group.log_group_name self._log_stream_name = log_stream.log_stream_name
def __init__(self, scope: cdk.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) # Version of ParallelCluster for Cloud9. pcluster_version = cdk.CfnParameter( self, 'ParallelClusterVersion', description= 'Specify a custom parallelcluster version. See https://pypi.org/project/aws-parallelcluster/#history for options.', default='2.8.0', type='String', allowed_values=get_version_list('aws-parallelcluster')) # S3 URI for Config file config = cdk.CfnParameter( self, 'ConfigS3URI', description='Set a custom parallelcluster config file.', default= 'https://notearshpc-quickstart.s3.amazonaws.com/{0}/config.ini'. format(__version__)) # Password password = cdk.CfnParameter( self, 'UserPasswordParameter', description='Set a password for the hpc-quickstart user', no_echo=True) # create a VPC vpc = ec2.Vpc( self, 'VPC', cidr='10.0.0.0/16', gateway_endpoints={ "S3": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.S3), "DynamoDB": ec2.GatewayVpcEndpointOptions( service=ec2.GatewayVpcEndpointAwsService.DYNAMODB) }, max_azs=99) # create a private and public subnet per vpc selection = vpc.select_subnets(subnet_type=ec2.SubnetType.PRIVATE) # Output created subnets for i, public_subnet in enumerate(vpc.public_subnets): cdk.CfnOutput(self, 'PublicSubnet%i' % i, value=public_subnet.subnet_id) for i, private_subnet in enumerate(vpc.private_subnets): cdk.CfnOutput(self, 'PrivateSubnet%i' % i, value=private_subnet.subnet_id) cdk.CfnOutput(self, 'VPCId', value=vpc.vpc_id) # Create a Bucket data_bucket = s3.Bucket(self, "DataRepository") cdk.CfnOutput(self, 'DataRespository', value=data_bucket.bucket_name) cloudtrail_bucket = s3.Bucket(self, "CloudTrailLogs") quickstart_bucket = s3.Bucket.from_bucket_name(self, 'QuickStartBucket', 'aws-quickstart') # Upload Bootstrap Script to that bucket bootstrap_script = assets.Asset(self, 'BootstrapScript', path='scripts/bootstrap.sh') # Upload parallel cluster post_install_script to that bucket pcluster_post_install_script = assets.Asset( self, 'PclusterPostInstallScript', path='scripts/post_install_script.sh') # Upload parallel cluster post_install_script to that bucket pcluster_config_script = assets.Asset(self, 'PclusterConfigScript', path='scripts/config.ini') # Setup CloudTrail cloudtrail.Trail(self, 'CloudTrail', bucket=cloudtrail_bucket) # Create a Cloud9 instance # Cloud9 doesn't have the ability to provide userdata # Because of this we need to use SSM run command cloud9_instance = cloud9.Ec2Environment( self, 'ResearchWorkspace', vpc=vpc, instance_type=ec2.InstanceType( instance_type_identifier='c5.large')) cdk.CfnOutput(self, 'Research Workspace URL', value=cloud9_instance.ide_url) # Create a keypair in lambda and store the private key in SecretsManager c9_createkeypair_role = iam.Role( self, 'Cloud9CreateKeypairRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) c9_createkeypair_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole')) # Add IAM permissions to the lambda role c9_createkeypair_role.add_to_policy( iam.PolicyStatement( actions=['ec2:CreateKeyPair', 'ec2:DeleteKeyPair'], resources=['*'], )) # Lambda for Cloud9 keypair c9_createkeypair_lambda = _lambda.Function( self, 'C9CreateKeyPairLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(300), role=c9_createkeypair_role, code=_lambda.Code.asset('functions/source/c9keypair'), ) c9_createkeypair_provider = cr.Provider( self, "C9CreateKeyPairProvider", on_event_handler=c9_createkeypair_lambda) c9_createkeypair_cr = cfn.CustomResource( self, "C9CreateKeyPair", provider=c9_createkeypair_provider, properties={'ServiceToken': c9_createkeypair_lambda.function_arn}) #c9_createkeypair_cr.node.add_dependency(instance_id) c9_ssh_private_key_secret = secretsmanager.CfnSecret( self, 'SshPrivateKeySecret', secret_string=c9_createkeypair_cr.get_att_string('PrivateKey')) # The iam policy has a <REGION> parameter that needs to be replaced. # We do it programmatically so future versions of the synth'd stack # template include all regions. with open('iam/ParallelClusterUserPolicy.json') as json_file: data = json.load(json_file) for s in data['Statement']: if s['Sid'] == 'S3ParallelClusterReadOnly': s['Resource'] = [] for r in region_info.RegionInfo.regions: s['Resource'].append( 'arn:aws:s3:::{0}-aws-parallelcluster*'.format( r.name)) parallelcluster_user_policy = iam.CfnManagedPolicy( self, 'ParallelClusterUserPolicy', policy_document=iam.PolicyDocument.from_json(data)) # Cloud9 IAM Role cloud9_role = iam.Role( self, 'Cloud9Role', assumed_by=iam.ServicePrincipal('ec2.amazonaws.com')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonSSMManagedInstanceCore')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name('AWSCloud9User')) cloud9_role.add_managed_policy( iam.ManagedPolicy.from_managed_policy_arn( self, 'AttachParallelClusterUserPolicy', parallelcluster_user_policy.ref)) cloud9_role.add_to_policy( iam.PolicyStatement(resources=['*'], actions=[ 'ec2:DescribeInstances', 'ec2:DescribeVolumes', 'ec2:ModifyVolume' ])) cloud9_role.add_to_policy( iam.PolicyStatement(resources=[c9_ssh_private_key_secret.ref], actions=['secretsmanager:GetSecretValue'])) cloud9_role.add_to_policy( iam.PolicyStatement( actions=["s3:Get*", "s3:List*"], resources=[ "arn:aws:s3:::%s/*" % (data_bucket.bucket_name), "arn:aws:s3:::%s" % (data_bucket.bucket_name) ])) bootstrap_script.grant_read(cloud9_role) pcluster_post_install_script.grant_read(cloud9_role) pcluster_config_script.grant_read(cloud9_role) # Admin Group admin_group = iam.Group(self, 'AdminGroup') admin_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AdministratorAccess')) admin_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloud9Administrator')) # PowerUser Group poweruser_group = iam.Group(self, 'PowerUserGroup') poweruser_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name('PowerUserAccess')) poweruser_group.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'AWSCloud9Administrator')) # HPC User user = iam.CfnUser( self, 'Researcher', groups=[admin_group.node.default_child.ref], login_profile=iam.CfnUser.LoginProfileProperty( password_reset_required=True, password=cdk.SecretValue.cfn_parameter(password).to_string())) create_user = cdk.CfnParameter(self, "CreateUser", default="false", type="String", allowed_values=['true', 'false' ]).value_as_string user_condition = cdk.CfnCondition(self, "UserCondition", expression=cdk.Fn.condition_equals( create_user, "true")) user.cfn_options.condition = user_condition cdk.CfnOutput(self, 'UserLoginUrl', value="".join([ "https://", self.account, ".signin.aws.amazon.com/console" ]), condition=user_condition) cdk.CfnOutput(self, 'UserName', value=user.ref, condition=user_condition) # Cloud9 Setup IAM Role cloud9_setup_role = iam.Role( self, 'Cloud9SetupRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com')) cloud9_setup_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSLambdaBasicExecutionRole')) # Allow pcluster to be run in bootstrap cloud9_setup_role.add_managed_policy( iam.ManagedPolicy.from_managed_policy_arn( self, 'AttachParallelClusterUserPolicySetup', parallelcluster_user_policy.ref)) # Add IAM permissions to the lambda role cloud9_setup_role.add_to_policy( iam.PolicyStatement( actions=[ 'cloudformation:DescribeStackResources', 'ec2:AssociateIamInstanceProfile', 'ec2:AuthorizeSecurityGroupIngress', 'ec2:DescribeInstances', 'ec2:DescribeInstanceStatus', 'ec2:DescribeInstanceAttribute', 'ec2:DescribeIamInstanceProfileAssociations', 'ec2:DescribeVolumes', 'ec2:DesctibeVolumeAttribute', 'ec2:DescribeVolumesModifications', 'ec2:DescribeVolumeStatus', 'ssm:DescribeInstanceInformation', 'ec2:ModifyVolume', 'ec2:ReplaceIamInstanceProfileAssociation', 'ec2:ReportInstanceStatus', 'ssm:SendCommand', 'ssm:GetCommandInvocation', 's3:GetObject', 'lambda:AddPermission', 'lambda:RemovePermission', 'events:PutRule', 'events:DeleteRule', 'events:PutTargets', 'events:RemoveTargets', 'cloud9:CreateEnvironmentMembership', ], resources=['*'], )) cloud9_setup_role.add_to_policy( iam.PolicyStatement(actions=['iam:PassRole'], resources=[cloud9_role.role_arn])) cloud9_setup_role.add_to_policy( iam.PolicyStatement( actions=['lambda:AddPermission', 'lambda:RemovePermission'], resources=['*'])) # Cloud9 Instance Profile c9_instance_profile = iam.CfnInstanceProfile( self, "Cloud9InstanceProfile", roles=[cloud9_role.role_name]) # Lambda to add Instance Profile to Cloud9 c9_instance_profile_lambda = _lambda.Function( self, 'C9InstanceProfileLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9InstanceProfile'), ) c9_instance_profile_provider = cr.Provider( self, "C9InstanceProfileProvider", on_event_handler=c9_instance_profile_lambda, ) instance_id = cfn.CustomResource(self, "C9InstanceProfile", provider=c9_instance_profile_provider, properties={ 'InstanceProfile': c9_instance_profile.ref, 'Cloud9Environment': cloud9_instance.environment_id, }) instance_id.node.add_dependency(cloud9_instance) # Lambda for Cloud9 Bootstrap c9_bootstrap_lambda = _lambda.Function( self, 'C9BootstrapLambda', runtime=_lambda.Runtime.PYTHON_3_6, handler='lambda_function.handler', timeout=cdk.Duration.seconds(900), role=cloud9_setup_role, code=_lambda.Code.asset('functions/source/c9bootstrap'), ) c9_bootstrap_provider = cr.Provider( self, "C9BootstrapProvider", on_event_handler=c9_bootstrap_lambda) c9_bootstrap_cr = cfn.CustomResource( self, "C9Bootstrap", provider=c9_bootstrap_provider, properties={ 'Cloud9Environment': cloud9_instance.environment_id, 'BootstrapPath': 's3://%s/%s' % (bootstrap_script.s3_bucket_name, bootstrap_script.s3_object_key), 'Config': config, 'VPCID': vpc.vpc_id, 'MasterSubnetID': vpc.public_subnets[0].subnet_id, 'ComputeSubnetID': vpc.private_subnets[0].subnet_id, 'PostInstallScriptS3Url': "".join([ 's3://', pcluster_post_install_script.s3_bucket_name, "/", pcluster_post_install_script.s3_object_key ]), 'PostInstallScriptBucket': pcluster_post_install_script.s3_bucket_name, 'S3ReadWriteResource': data_bucket.bucket_arn, 'S3ReadWriteUrl': 's3://%s' % (data_bucket.bucket_name), 'KeyPairId': c9_createkeypair_cr.ref, 'KeyPairSecretArn': c9_ssh_private_key_secret.ref, 'UserArn': user.attr_arn, 'PclusterVersion': pcluster_version.value_as_string }) c9_bootstrap_cr.node.add_dependency(instance_id) c9_bootstrap_cr.node.add_dependency(c9_createkeypair_cr) c9_bootstrap_cr.node.add_dependency(c9_ssh_private_key_secret) c9_bootstrap_cr.node.add_dependency(data_bucket) enable_budget = cdk.CfnParameter(self, "EnableBudget", default="true", type="String", allowed_values=['true', 'false' ]).value_as_string # Budgets budget_properties = { 'budgetType': "COST", 'timeUnit': "ANNUALLY", 'budgetLimit': { 'amount': cdk.CfnParameter( self, 'BudgetLimit', description= 'The initial budget for this project in USD ($).', default=2000, type='Number').value_as_number, 'unit': "USD", }, 'costFilters': None, 'costTypes': { 'includeCredit': False, 'includeDiscount': True, 'includeOtherSubscription': True, 'includeRecurring': True, 'includeRefund': True, 'includeSubscription': True, 'includeSupport': True, 'includeTax': True, 'includeUpfront': True, 'useAmortized': False, 'useBlended': False, }, 'plannedBudgetLimits': None, 'timePeriod': None, } email = { 'notification': { 'comparisonOperator': "GREATER_THAN", 'notificationType': "ACTUAL", 'threshold': 80, 'thresholdType': "PERCENTAGE", }, 'subscribers': [{ 'address': cdk.CfnParameter( self, 'NotificationEmail', description= 'This email address will receive billing alarm notifications when 80% of the budget limit is reached.', default='*****@*****.**').value_as_string, 'subscriptionType': "EMAIL", }] } overall_budget = budgets.CfnBudget( self, "HPCBudget", budget=budget_properties, notifications_with_subscribers=[email], ) overall_budget.cfn_options.condition = cdk.CfnCondition( self, "BudgetCondition", expression=cdk.Fn.condition_equals(enable_budget, "true"))
def __init__( self, scope: cdk.Construct, construct_id: str, stack_log_level: str, vpc, my_sql_db_sg, store_events_db_endpoint, sales_events_bkt, _glue_etl_role, glue_db_name: str, glue_table_name: str, tgt_db_secret, **kwargs, ) -> None: super().__init__(scope, construct_id, **kwargs) self.template_options.metadata = {"License": "Miztiik Corp."} # ADD Permissions to our Glue JOB Role to Access Secrets tgt_db_secret.grant_read(_glue_etl_role) # # Create GLUE JDBC Connection for RDS MySQL # Allow ALL PORTS within SG for GLUE Connections to connect # https://docs.aws.amazon.com/glue/latest/dg/connection-defining.html#connection-properties-jdbc # https://docs.aws.amazon.com/glue/latest/dg/setup-vpc-for-glue-access.html # https://docs.amazonaws.cn/en_us/glue/latest/dg/connection-defining.html rds_mysql_conn_props = _glue.CfnConnection.ConnectionInputProperty( connection_type="JDBC", description="Glue Connection for RDS MySQL Store Events Database", name="rdsMySQL57Conn", physical_connection_requirements=_glue.CfnConnection.PhysicalConnectionRequirementsProperty( security_group_id_list=[my_sql_db_sg.security_group_id], subnet_id=vpc.select_subnets( subnet_type=_ec2.SubnetType.PRIVATE ).subnet_ids[1] ), connection_properties={ "JDBC_CONNECTION_URL": f"jdbc:mysql://{store_events_db_endpoint}:3306/store_events", "JDBC_ENFORCE_SSL": "false", "USERNAME": "******", "PASSWORD": "******" } ) rds_mysql_conn = _glue.CfnConnection( self, "rdsMySQLGlueConnection", catalog_id=f"{cdk.Aws.ACCOUNT_ID}", connection_input=rds_mysql_conn_props ) # Create the Glue job to convert incoming JSON to parquet # Read GlueSpark Code try: with open( "stacks/back_end/glue_stacks/glue_job_scripts/load_json_to_rds.py", encoding="utf-8", mode="r", ) as f: load_json_to_rds = f.read() except OSError: print("Unable to read Glue Job Code") raise etl_script_asset = _s3_assets.Asset( self, "etlScriptAsset", path="stacks/back_end/glue_stacks/glue_job_scripts/load_json_to_rds.py" ) self.etl_prefix = "stream-etl" _glue_etl_job = _glue.CfnJob( self, "glues3ToRdsIngestorJob", name="s3-to-rds-ingestor", description="Glue Job to ingest JSON data from S3 to RDS", role=_glue_etl_role.role_arn, glue_version="2.0", command=_glue.CfnJob.JobCommandProperty( name="glueetl", script_location=f"s3://{etl_script_asset.s3_bucket_name}/{etl_script_asset.s3_object_key}", python_version="3" ), connections={"connections": [rds_mysql_conn_props.name]}, default_arguments={ "--enable-metrics": True, "--enable-continuous-cloudwatch-log": True, "--job-bookmark-option": "job-bookmark-enable", '--TempDir': f"s3://{sales_events_bkt.bucket_name}/bookmarks", "--src_db_name": glue_db_name, "--src_etl_bkt": f"{sales_events_bkt.bucket_name}", "--crawler_tbl_prefix": "txns_", "--tgt_db_secret_arn": tgt_db_secret.secret_arn, "--tgt_tbl_name": glue_table_name, "--conn_name": f"{rds_mysql_conn_props.name}" }, allocated_capacity=1, # timeout=2, max_retries=2, execution_property=_glue.CfnJob.ExecutionPropertyProperty( max_concurrent_runs=2) ) # Configure a Trigger - Every hour _glue_etl_job_trigger = _glue.CfnTrigger( self, "glueEtlJobtrigger", type="SCHEDULED", description="Miztiik Automation: Trigger S3 to RDS Ingestor glue job every hour", schedule="cron(0 1 * * ? *)", start_on_creation=False, actions=[ _glue.CfnTrigger.ActionProperty( job_name=f"{_glue_etl_job.name}", timeout=2 ) ] ) _glue_etl_job_trigger.add_depends_on(_glue_etl_job) # Configure Glue Workflow _glue_etl_job_workflow = _glue.CfnWorkflow( self, "glueEtlJobWorkflow" ) ########################################### ################# OUTPUTS ################# ########################################### output_0 = cdk.CfnOutput( self, "AutomationFrom", value=f"{GlobalArgs.SOURCE_INFO}", description="To know more about this automation stack, check out our github page.", ) output_1 = cdk.CfnOutput( self, "RDSIngestorETLGlueJob", value=f"https://console.aws.amazon.com/gluestudio/home?region={cdk.Aws.REGION}#/jobs", description="Glue Job to ingest JSON data from S3 to RDS.", )
def __init__(self, scope: core.Construct, id: str, props, **kwargs) -> None: super().__init__(scope, id, **kwargs) dirname = os.path.dirname(__file__) ecr_repo = ecr.Repository.from_repository_name( self, 'UmccriseEcrRepo', repository_name='umccrise' ) ################################################################################ # Set up permissions ro_buckets = set() for bucket in props['ro_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) ro_buckets.add(tmp_bucket) rw_buckets = set() for bucket in props['rw_buckets']: tmp_bucket = s3.Bucket.from_bucket_name( self, bucket, bucket_name=bucket ) rw_buckets.add(tmp_bucket) batch_service_role = iam.Role( self, 'BatchServiceRole', assumed_by=iam.ServicePrincipal('batch.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSBatchServiceRole') ] ) spotfleet_role = iam.Role( self, 'AmazonEC2SpotFleetRole', assumed_by=iam.ServicePrincipal('spotfleet.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2SpotFleetTaggingRole') ] ) # Create role for Batch instances batch_instance_role = iam.Role( self, 'BatchInstanceRole', role_name='UmccriseBatchInstanceRole', assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('ec2.amazonaws.com'), iam.ServicePrincipal('ecs.amazonaws.com') ), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2RoleforSSM'), iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AmazonEC2ContainerServiceforEC2Role') ] ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ec2:Describe*", "ec2:AttachVolume", "ec2:CreateVolume", "ec2:CreateTags", "ec2:ModifyInstanceAttribute" ], resources=["*"] ) ) batch_instance_role.add_to_policy( iam.PolicyStatement( actions=[ "ecs:ListClusters" ], resources=["*"] ) ) for bucket in ro_buckets: bucket.grant_read(batch_instance_role) for bucket in rw_buckets: # restirct write to paths with */umccrise/* bucket.grant_read_write(batch_instance_role, '*/umccrised/*') # Turn the instance role into a Instance Profile batch_instance_profile = iam.CfnInstanceProfile( self, 'BatchInstanceProfile', instance_profile_name='UmccriseBatchInstanceProfile', roles=[batch_instance_role.role_name] ) ################################################################################ # Network # (Import common infrastructure (maintained via TerraForm) # VPC vpc = ec2.Vpc.from_lookup( self, 'UmccrMainVpc', tags={'Name': 'main-vpc', 'Stack': 'networking'} ) batch_security_group = ec2.SecurityGroup( self, "BatchSecurityGroup", vpc=vpc, description="Allow all outbound, no inbound traffic" ) ################################################################################ # Setup Batch compute resources # Configure BlockDevice to expand instance disk space (if needed?) block_device_mappings = [ { 'deviceName': '/dev/xvdf', 'ebs': { 'deleteOnTermination': True, 'encrypted': True, 'volumeSize': 2048, 'volumeType': 'gp2' } } ] # Set up custom user data to configure the Batch instances umccrise_wrapper_asset = assets.Asset( self, 'UmccriseWrapperAsset', path=os.path.join(dirname, '..', 'assets', "umccrise-wrapper.sh") ) umccrise_wrapper_asset.grant_read(batch_instance_role) user_data_asset = assets.Asset( self, 'UserDataAsset', path=os.path.join(dirname, '..', 'assets', "batch-user-data.sh") ) user_data_asset.grant_read(batch_instance_role) user_data = ec2.UserData.for_linux() local_path = user_data.add_s3_download_command( bucket=user_data_asset.bucket, bucket_key=user_data_asset.s3_object_key ) user_data.add_execute_file_command( file_path=local_path, arguments=f"s3://{umccrise_wrapper_asset.bucket.bucket_name}/{umccrise_wrapper_asset.s3_object_key}" ) # Generate user data wrapper to comply with LaunchTemplate required MIME multi-part archive format for user data mime_wrapper = ec2.UserData.custom('MIME-Version: 1.0') mime_wrapper.add_commands('Content-Type: multipart/mixed; boundary="==MYBOUNDARY=="') mime_wrapper.add_commands('') mime_wrapper.add_commands('--==MYBOUNDARY==') mime_wrapper.add_commands('Content-Type: text/x-shellscript; charset="us-ascii"') mime_wrapper.add_commands('') # install AWS CLI, as it's unexpectedly missing from the AWS Linux 2 AMI... mime_wrapper.add_commands('yum -y install unzip') mime_wrapper.add_commands('cd /opt') mime_wrapper.add_commands('curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"') mime_wrapper.add_commands('unzip awscliv2.zip') mime_wrapper.add_commands('sudo ./aws/install --bin-dir /usr/bin') # insert our actual user data payload mime_wrapper.add_commands(user_data.render()) mime_wrapper.add_commands('--==MYBOUNDARY==--') launch_template = ec2.CfnLaunchTemplate( self, 'UmccriseBatchComputeLaunchTemplate', launch_template_name='UmccriseBatchComputeLaunchTemplate', launch_template_data={ 'userData': core.Fn.base64(mime_wrapper.render()), 'blockDeviceMappings': block_device_mappings } ) launch_template_spec = batch.LaunchTemplateSpecification( launch_template_name=launch_template.launch_template_name, version='$Latest' ) my_compute_res = batch.ComputeResources( type=(batch.ComputeResourceType.SPOT if props['compute_env_type'].lower() == 'spot' else batch.ComputeResourceType.ON_DEMAND), allocation_strategy=batch.AllocationStrategy.BEST_FIT_PROGRESSIVE, desiredv_cpus=0, maxv_cpus=320, minv_cpus=0, image=ec2.MachineImage.generic_linux(ami_map={'ap-southeast-2': props['compute_env_ami']}), launch_template=launch_template_spec, spot_fleet_role=spotfleet_role, instance_role=batch_instance_profile.instance_profile_name, vpc=vpc, vpc_subnets=ec2.SubnetSelection( subnet_type=ec2.SubnetType.PRIVATE, # availability_zones=["ap-southeast-2a"] ), security_groups=[batch_security_group] # compute_resources_tags=core.Tag('Creator', 'Batch') ) # XXX: How to add more than one tag above?? # https://github.com/aws/aws-cdk/issues/7350 # core.Tag.add(my_compute_res, 'Foo', 'Bar') my_compute_env = batch.ComputeEnvironment( self, 'UmccriseBatchComputeEnv', compute_environment_name="cdk-umccr_ise-batch-compute-env", service_role=batch_service_role, compute_resources=my_compute_res ) # child = my_compute_env.node.default_child # child_comp_res = child.compute_resources # child_comp_res.tags = "{'Foo': 'Bar'}" job_queue = batch.JobQueue( self, 'UmccriseJobQueue', job_queue_name='cdk-umccrise_job_queue', compute_environments=[ batch.JobQueueComputeEnvironment( compute_environment=my_compute_env, order=1 ) ], priority=10 ) job_container = batch.JobDefinitionContainer( image=ecs.ContainerImage.from_registry(name=props['container_image']), vcpus=32, memory_limit_mib=100000, command=[ "/opt/container/umccrise-wrapper.sh", "Ref::vcpus" ], mount_points=[ ecs.MountPoint( container_path='/work', read_only=False, source_volume='work' ), ecs.MountPoint( container_path='/opt/container', read_only=True, source_volume='container' ) ], volumes=[ ecs.Volume( name='container', host=ecs.Host( source_path='/opt/container' ) ), ecs.Volume( name='work', host=ecs.Host( source_path='/mnt' ) ) ], privileged=True ) job_definition = batch.JobDefinition( self, 'UmccriseJobDefinition', job_definition_name='cdk-umccrise-job-definition', parameters={'vcpus': '1'}, container=job_container, timeout=core.Duration.hours(5) ) ################################################################################ # Set up job submission Lambda lambda_role = iam.Role( self, 'UmccriseLambdaRole', assumed_by=iam.ServicePrincipal('lambda.amazonaws.com'), managed_policies=[ iam.ManagedPolicy.from_aws_managed_policy_name('service-role/AWSLambdaBasicExecutionRole'), iam.ManagedPolicy.from_aws_managed_policy_name('AWSBatchFullAccess') # TODO: restrict! ] ) for bucket in ro_buckets: bucket.grant_read(lambda_role) for bucket in rw_buckets: bucket.grant_read(lambda_role) ecr_repo.grant(lambda_role, 'ecr:ListImages') # TODO: support dev/prod split, i.e. image being configurable on dev, but fixed on prod # may need a default JobDefinition to be set up lmbda.Function( self, 'UmccriseLambda', function_name='umccrise_batch_lambda', handler='umccrise.lambda_handler', runtime=lmbda.Runtime.PYTHON_3_7, code=lmbda.Code.from_asset('lambdas/umccrise'), environment={ 'JOBNAME_PREFIX': "UMCCRISE_", 'JOBQUEUE': job_queue.job_queue_name, 'UMCCRISE_MEM': '100000', 'UMCCRISE_VCPUS': '32', 'JOBDEF': job_definition.job_definition_name, 'REFDATA_BUCKET': props['refdata_bucket'], 'INPUT_BUCKET': props['input_bucket'], 'RESULT_BUCKET': props['result_bucket'], 'IMAGE_CONFIGURABLE': props['image_configurable'] }, role=lambda_role )
def __init__( self, scope: core.Construct, id: str, vpc_stack, logstash_ec2=True, logstash_fargate=True, **kwargs, ) -> None: super().__init__(scope, id, **kwargs) # get s3 bucket name s3client = boto3.client("s3") s3_bucket_list = s3client.list_buckets() s3_bucket_name = "" for bkt in s3_bucket_list["Buckets"]: try: bkt_tags = s3client.get_bucket_tagging( Bucket=bkt["Name"])["TagSet"] for keypairs in bkt_tags: if (keypairs["Key"] == "aws:cloudformation:stack-name" and keypairs["Value"] == "elkk-athena"): s3_bucket_name = bkt["Name"] except ClientError as err: if err.response["Error"]["Code"] in [ "NoSuchTagSet", "NoSuchBucket" ]: pass else: print(f"Unexpected error: {err}") # get elastic endpoint esclient = boto3.client("es") es_domains = esclient.list_domain_names() try: es_domain = [ dom["DomainName"] for dom in es_domains["DomainNames"] if "elkk-" in dom["DomainName"] ][0] es_endpoint = esclient.describe_elasticsearch_domain( DomainName=es_domain) es_endpoint = es_endpoint["DomainStatus"]["Endpoints"]["vpc"] except IndexError: es_endpoint = "" # assets for logstash stack logstash_yml = assets.Asset(self, "logstash_yml", path=os.path.join(dirname, "logstash.yml")) logstash_repo = assets.Asset(self, "logstash_repo", path=os.path.join(dirname, "logstash.repo")) # update conf file to .asset # kafka brokerstring does not need reformatting logstash_conf_asset = file_updated( os.path.join(dirname, "logstash.conf"), { "$s3_bucket": s3_bucket_name, "$es_endpoint": es_endpoint, "$kafka_brokers": kafka_get_brokers(), "$elkk_region": os.environ["CDK_DEFAULT_REGION"], }, ) logstash_conf = assets.Asset( self, "logstash.conf", path=logstash_conf_asset, ) # logstash security group logstash_security_group = ec2.SecurityGroup( self, "logstash_security_group", vpc=vpc_stack.get_vpc, description="logstash security group", allow_all_outbound=True, ) core.Tags.of(logstash_security_group).add("project", constants["PROJECT_TAG"]) core.Tags.of(logstash_security_group).add("Name", "logstash_sg") # Open port 22 for SSH logstash_security_group.add_ingress_rule( ec2.Peer.ipv4(f"{external_ip}/32"), ec2.Port.tcp(22), "from own public ip", ) # get security group for kafka ec2client = boto3.client("ec2") security_groups = ec2client.describe_security_groups(Filters=[{ "Name": "tag-value", "Values": [constants["PROJECT_TAG"]] }], ) # if kafka sg does not exist ... don't add it try: kafka_sg_id = [ sg["GroupId"] for sg in security_groups["SecurityGroups"] if "kafka security group" in sg["Description"] ][0] kafka_security_group = ec2.SecurityGroup.from_security_group_id( self, "kafka_security_group", security_group_id=kafka_sg_id) # let in logstash kafka_security_group.connections.allow_from( logstash_security_group, ec2.Port.all_traffic(), "from logstash", ) except IndexError: # print("kafka_sg_id and kafka_security_group not found") pass # get security group for elastic try: elastic_sg_id = [ sg["GroupId"] for sg in security_groups["SecurityGroups"] if "elastic security group" in sg["Description"] ][0] elastic_security_group = ec2.SecurityGroup.from_security_group_id( self, "elastic_security_group", security_group_id=elastic_sg_id) # let in logstash elastic_security_group.connections.allow_from( logstash_security_group, ec2.Port.all_traffic(), "from logstash", ) except IndexError: pass # elastic policy access_elastic_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=[ "es:ListDomainNames", "es:DescribeElasticsearchDomain", "es:ESHttpPut", ], resources=["*"], ) # kafka policy access_kafka_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["kafka:ListClusters", "kafka:GetBootstrapBrokers"], resources=["*"], ) # s3 policy access_s3_policy = iam.PolicyStatement( effect=iam.Effect.ALLOW, actions=["s3:ListBucket", "s3:PutObject"], resources=["*"], ) # create the Logstash instance if logstash_ec2: # userdata for Logstash logstash_userdata = user_data_init( log_group_name="elkk/logstash/instance") # create the instance logstash_instance = ec2.Instance( self, "logstash_client", instance_type=ec2.InstanceType(constants["LOGSTASH_INSTANCE"]), machine_image=ec2.AmazonLinuxImage( generation=ec2.AmazonLinuxGeneration.AMAZON_LINUX_2), vpc=vpc_stack.get_vpc, vpc_subnets=SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), key_name=constants["KEY_PAIR"], security_group=logstash_security_group, user_data=logstash_userdata, ) core.Tag.add(logstash_instance, "project", constants["PROJECT_TAG"]) # add access to the file assets logstash_yml.grant_read(logstash_instance) logstash_repo.grant_read(logstash_instance) logstash_conf.grant_read(logstash_instance) # add permissions to instance logstash_instance.add_to_role_policy( statement=access_elastic_policy) logstash_instance.add_to_role_policy(statement=access_kafka_policy) logstash_instance.add_to_role_policy(statement=access_s3_policy) # add log permissions instance_add_log_permissions(logstash_instance) # add commands to the userdata logstash_userdata.add_commands( # get setup assets files f"aws s3 cp s3://{logstash_yml.s3_bucket_name}/{logstash_yml.s3_object_key} /home/ec2-user/logstash.yml", f"aws s3 cp s3://{logstash_repo.s3_bucket_name}/{logstash_repo.s3_object_key} /home/ec2-user/logstash.repo", f"aws s3 cp s3://{logstash_conf.s3_bucket_name}/{logstash_conf.s3_object_key} /home/ec2-user/logstash.conf", # install java "amazon-linux-extras install java-openjdk11 -y", # install git "yum install git -y", # install pip "yum install python-pip -y", # get elastic output to es "git clone https://github.com/awslabs/logstash-output-amazon_es.git /home/ec2-user/logstash-output-amazon_es", # logstash "rpm --import https://artifacts.elastic.co/GPG-KEY-elasticsearch", # move logstash repo file "mv -f /home/ec2-user/logstash.repo /etc/yum.repos.d/logstash.repo", # get to the yum "yum install logstash -y", # add user to logstash group "usermod -a -G logstash ec2-user", # move logstash.yml to final location "mv -f /home/ec2-user/logstash.yml /etc/logstash/logstash.yml", # move logstash.conf to final location "mv -f /home/ec2-user/logstash.conf /etc/logstash/conf.d/logstash.conf", # move plugin "mkdir /usr/share/logstash/plugins", "mv -f /home/ec2-user/logstash-output-amazon_es /usr/share/logstash/plugins/logstash-output-amazon_es", # update gemfile """sed -i '5igem "logstash-output-amazon_es", :path => "/usr/share/logstash/plugins/logstash-output-amazon_es"' /usr/share/logstash/Gemfile""", # update ownership "chown -R logstash:logstash /etc/logstash", # start logstash "systemctl start logstash.service", ) # add the signal logstash_userdata.add_signal_on_exit_command( resource=logstash_instance) # add creation policy for instance logstash_instance.instance.cfn_options.creation_policy = core.CfnCreationPolicy( resource_signal=core.CfnResourceSignal(count=1, timeout="PT10M")) # fargate for logstash if logstash_fargate: # cloudwatch log group for containers logstash_logs_containers = logs.LogGroup( self, "logstash_logs_containers", log_group_name="elkk/logstash/container", removal_policy=core.RemovalPolicy.DESTROY, retention=logs.RetentionDays.ONE_WEEK, ) # docker image for logstash logstash_image_asset = ecr_assets.DockerImageAsset( self, "logstash_image_asset", directory=dirname # , file="Dockerfile" ) # create the fargate cluster logstash_cluster = ecs.Cluster(self, "logstash_cluster", vpc=vpc_stack.get_vpc) core.Tag.add(logstash_cluster, "project", constants["PROJECT_TAG"]) # the task logstash_task = ecs.FargateTaskDefinition( self, "logstash_task", cpu=512, memory_limit_mib=1024, ) # add container to the task logstash_task.add_container( logstash_image_asset.source_hash, image=ecs.ContainerImage.from_docker_image_asset( logstash_image_asset), logging=ecs.LogDrivers.aws_logs( stream_prefix="elkk", log_group=logstash_logs_containers), ) # add permissions to the task logstash_task.add_to_task_role_policy(access_s3_policy) logstash_task.add_to_task_role_policy(access_elastic_policy) # the service logstash_service = (ecs.FargateService( self, "logstash_service", cluster=logstash_cluster, task_definition=logstash_task, security_group=logstash_security_group, deployment_controller=ecs.DeploymentController( type=ecs.DeploymentControllerType.ECS), ).auto_scale_task_count( min_capacity=3, max_capacity=10).scale_on_cpu_utilization( "logstash_scaling", target_utilization_percent=75, scale_in_cooldown=core.Duration.seconds(60), scale_out_cooldown=core.Duration.seconds(60), ))
def __init__(self, scope: core.Construct, id: str, **kwargs) -> None: super().__init__(scope, id, **kwargs) with open('./props/tasksetting.json', 'r') as f1: py_json1 = json.load(f1) ts = json.dumps(py_json1) # with open('./props/mappingrule.json', 'r') as f2: # py_json2 = json.load(f2) # mr = json.dumps(py_json2) with open('./props/config.json', 'r') as f2: configuration = json.load(f2) def getMappingrules(self, table_list): rules = [] for index, value in enumerate(table_list, 1): rules.append({ "rule-type": "selection", "rule-id": str(index), "rule-name": str(index), "object-locator": { "schema-name": value['schemaName'], "table-name": value['tableName'] }, "rule-action": "include", "filters": [] }) mapping_rules = {"rules": rules} return json.dumps(mapping_rules) # The code that defines your stack goes here S3Accessrole = _iam.Role( self, 'dmsrole', assumed_by=_iam.ServicePrincipal('dms.amazonaws.com'), managed_policies=[ _iam.ManagedPolicy.from_aws_managed_policy_name( 'AmazonS3FullAccess') ]) raw_bucket = s3.Bucket(self, 'rawbucket', bucket_name='rawbucket-datalake-cdk-oregon') raw_bucket.add_lifecycle_rule( enabled=configuration['s3LifecycleRule']['enabled'], expiration=core.Duration.days( configuration['s3LifecycleRule']['expiration'])) #my_table = ddb.Table(self, id ='dunamoTable', table_name = 'testcdktable', #partition_key = ddb.Attribute(name ='lastname',type = ddb.AttributeType.STRING) ) dl_dms = _dms.CfnReplicationInstance( self, 'dmsreplication', replication_instance_class=configuration['DMS_instance_setting'] ['instance_class'], replication_instance_identifier='datalake-instance-cdk', allocated_storage=configuration['DMS_instance_setting'] ['allocated_storage']) source_endpoint = _dms.CfnEndpoint( self, 'sourceendpoint', endpoint_type='source', engine_name=configuration['engineName'], database_name=configuration['databaseName'], username=configuration['username'], password=configuration['password'], port=configuration['port'], server_name=configuration['serverName'], ) target_endpoint = _dms.CfnEndpoint( self, 'targetendpoint', endpoint_type='target', engine_name='s3', s3_settings={ 'bucketName': raw_bucket.bucket_name, 'serviceAccessRoleArn': S3Accessrole.role_arn }, extra_connection_attributes='dataFormat=parquet') dms_task = _dms.CfnReplicationTask( self, 'data2lake-task', migration_type='full-load-and-cdc', replication_instance_arn=dl_dms.ref, source_endpoint_arn=source_endpoint.ref, target_endpoint_arn=target_endpoint.ref, replication_task_settings=ts, table_mappings=getMappingrules(self, configuration['tableList'])) my_table = ddb.Table(self, id='dynamoTable', table_name='ControllerTable', partition_key=ddb.Attribute( name='path', type=ddb.AttributeType.STRING), billing_mode=ddb.BillingMode.PAY_PER_REQUEST) datalake_bucket = s3.Bucket(self, 'datalakebucket', bucket_name='datalake-bucket-cdk-oregon') glue_role = _iam.Role( self, 'gluerole', assumed_by=_iam.ServicePrincipal('glue.amazonaws.com'), managed_policies=[ _iam.ManagedPolicy.from_aws_managed_policy_name( 'service-role/AWSGlueServiceRole') ]) raw_bucket.grant_read(glue_role) datalake_bucket.grant_read_write(glue_role) #lake formation settings #If you have attached managed policy ('AWSLakeFormationDataAdmin') to your own iam user, you should change that policy to allow "lakeformation:PutDataLakeSettings", #so that the lake setting can be allowed by below code in cdk. lake_admin_setting = _lakeformation.CfnDataLakeSettings( self, 'data-lake-GrantAdmin', admins=[ _lakeformation.CfnDataLakeSettings.DataLakePrincipalProperty( data_lake_principal_identifier=configuration[ 'executiveArn']) ]) glue_database = _glue.Database(self, 'gluedatabase', database_name='data_lake_gluedb') glue_database.node.add_dependency(lake_admin_setting) glue_role_permission_inLakeFormation = _lakeformation.CfnPermissions( self, 'permission-glueRole', data_lake_principal=_lakeformation.CfnPermissions. DataLakePrincipalProperty( data_lake_principal_identifier=glue_role.role_arn), resource=_lakeformation.CfnPermissions.ResourceProperty( database_resource=_lakeformation.CfnPermissions. DatabaseResourceProperty(name=glue_database.database_name)), permissions=['ALL']) crawler = _glue.CfnCrawler( self, 'datalakecrawler', name='Crawler-datalake-cdk', role=glue_role.role_arn, targets={ 's3Targets': [{ 'path': 's3://' + datalake_bucket.bucket_name + '/datalake/' }] }, database_name='data_lake_gluedb', configuration= "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}" ) initialload_script = S3Assets.Asset(self, 'initial-load-code', path='./Gluejob/InitialLoad.py') incrementalload_script = S3Assets.Asset( self, 'incremental-load-code', path='./Gluejob/IncrementalLoad.py') initialload_script.grant_read(glue_role) incrementalload_script.grant_read(glue_role) my_table.grant_full_access(glue_role) initial_load_job = _glue.CfnJob( self, 'initial-job', name='InitialLoad-cdk', command=_glue.CfnJob.JobCommandProperty( name='glueetl', python_version='3', script_location='s3://' + initialload_script.s3_bucket_name + '/' + initialload_script.s3_object_key), role=glue_role.role_arn, default_arguments={ '--prefix': str(configuration['tableList']), '--bucket': raw_bucket.bucket_name, '--datalake_bucket': datalake_bucket.bucket_name, '--datalake_prefix': 'datalake/', '--region': CdkpyStack.of(self).region, '--controller_table_name': my_table.table_name }, allocated_capacity=configuration['glue_job_setting'] ['job_capacity'], execution_property=_glue.CfnJob.ExecutionPropertyProperty( max_concurrent_runs=configuration['glue_job_setting'] ['max_concurrent_run_JobExecution'])) incremental_load_job = _glue.CfnJob( self, 'increment-job', name='IncrementalLoad-cdk', command=_glue.CfnJob.JobCommandProperty( name='glueetl', script_location='s3://' + incrementalload_script.s3_bucket_name + '/' + incrementalload_script.s3_object_key, python_version='3'), role=glue_role.role_arn, default_arguments={ '--prefix': str(configuration['tableList']), '--bucket': raw_bucket.bucket_name, '--datalake_bucket': datalake_bucket.bucket_name, '--datalake_prefix': 'datalake/', '--region': CdkpyStack.of(self).region, '--controller_table_name': my_table.table_name }, allocated_capacity=2, execution_property=_glue.CfnJob.ExecutionPropertyProperty( max_concurrent_runs=1)) job_trigger = _glue.CfnTrigger( self, 'datalake-glue-trigger', type='SCHEDULED', schedule=configuration['job_trigger_schedule'], start_on_creation=False, actions=[ _glue.CfnTrigger.ActionProperty(job_name='IncrementalLoad-cdk') ]) dl_sns = _sns.Topic(self, 'datalake_sns', display_name='data-lake-sns') endpoint_email = configuration['emailSubscriptionList'] for emails in endpoint_email: dl_sns.add_subscription(_subscrption.EmailSubscription(emails)) #Another way to subscribe: dl_subscription = _sns.Subscription(self,'email-subscrption',topic = dl_sns,endpoint='*****@*****.**',protocol= _sns.SubscriptionProtocol.EMAIL) glue_events_target = _events_targets.SnsTopic(dl_sns) glue_events_rule = _events.Rule( self, 'gluejobevents-datalake', description='Using for tracking the failed glue job of data lake', rule_name='dl-gluejob-event', event_pattern=_events.EventPattern( source=['aws.glue'], detail_type=['Glue Job State Change'], detail={ "jobName": [initial_load_job.name], "state": ["FAILED"] }), targets=[glue_events_target]) dms_subscription = _dms.CfnEventSubscription( self, 'dmsevents-datalake', sns_topic_arn=dl_sns.topic_arn, subscription_name='datalake-dmsevents', source_type='replication-task', event_categories=['failure'])