Пример #1
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 *,
                 emr_step: emr_code.EMRStep,
                 cluster_id: str,
                 result_path: Optional[str] = None,
                 output_path: Optional[str] = None,
                 fail_chain: Optional[sfn.IChainable] = None,
                 wait_for_step_completion: bool = True):
        super().__init__(scope, id)

        override_step_args = emr_lambdas.OverrideStepArgsBuilder.get_or_build(
            self)

        override_step_args_task = sfn.Task(
            self,
            f'{emr_step.name} - Override Args',
            result_path=f'$.{id}ResultArgs',
            task=sfn_tasks.InvokeFunction(
                override_step_args,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'StepName':
                    emr_step.name,
                    'Args':
                    emr_step.args
                }))

        resolved_step = emr_step.resolve(self)
        resolved_step['HadoopJarStep']['Args'] = sfn.TaskInput.from_data_at(
            f'$.{id}ResultArgs').value

        integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_step_completion \
            else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET

        add_step_task = sfn.Task(self,
                                 emr_step.name,
                                 output_path=output_path,
                                 result_path=result_path,
                                 task=emr_tasks.EmrAddStepTask(
                                     cluster_id=cluster_id,
                                     step=resolved_step,
                                     integration_pattern=integration_pattern))

        if fail_chain:
            override_step_args_task.add_catch(fail_chain,
                                              errors=['States.ALL'],
                                              result_path='$.Error')
            add_step_task.add_catch(fail_chain,
                                    errors=['States.ALL'],
                                    result_path='$.Error')

        override_step_args_task.next(add_step_task)

        self._start = override_step_args_task
        self._end = add_step_task
    def __init__(self, scope: core.App, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        pass_through_lambda = _lambda.Function(
            self,
            'PassThroughLambda',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset('lambda'),
            handler='pass_through_lambda.handler')

        loop_count_lambda = _lambda.Function(
            self,
            'LoopCountLambda',
            runtime=_lambda.Runtime.PYTHON_3_7,
            code=_lambda.Code.asset('lambda'),
            handler='loop_count_lambda.handler')

        start_state_machine = sfn.Task(
            self,
            "Start CodeBuild Lambda",
            task=sfn_tasks.InvokeFunction(pass_through_lambda))

        wait_x = sfn.Wait(
            self,
            "Wait X Seconds",
            time=sfn.WaitTime.seconds_path('$.wait_time'),
        )

        get_state_machine_status = sfn.Task(
            self,
            "Get Build Status",
            task=sfn_tasks.InvokeFunction(loop_count_lambda))

        is_complete = sfn.Choice(self, "Job Complete?")

        state_machine_failed = sfn.Fail(self,
                                        "Build Failed",
                                        cause="AWS Batch Job Failed",
                                        error="DescribeJob returned FAILED")

        state_machine_success = sfn.Pass(self, "Build Successs")

        definition = start_state_machine\
            .next(wait_x)\
            .next(get_state_machine_status)\
            .next(is_complete
                  .when(sfn.Condition.string_equals(
                      "$.status", "FAILED"), state_machine_failed)
                  .when(sfn.Condition.string_equals(
                      "$.status", "SUCCEEDED"), state_machine_success)
                  .otherwise(wait_x))

        sfn.StateMachine(
            self,
            "StateMachine",
            definition=definition,
            timeout=core.Duration.seconds(60),
        )
Пример #3
0
    def __init__(self, app: cdk.App, id: str, **kwargs) -> None:
        super().__init__(app, id, **kwargs)

        submit_job_activity = sfn.Activity(
            self, "SubmitJob"
        )
        check_job_activity = sfn.Activity(
            self, "CheckJob"
        )

        submit_job = sfn.Task(
            self, "Submit Job",
            task=sfn_tasks.InvokeActivity(submit_job_activity),
            result_path="$.guid",
        )
        wait_x = sfn.Wait(
            self, "Wait X Seconds",
            duration=sfn.WaitDuration.seconds_path('$.wait_time'),
        )
        get_status = sfn.Task(
            self, "Get Job Status",
            task=sfn_tasks.InvokeActivity(check_job_activity),
            input_path="$.guid",
            result_path="$.status",
        )
        is_complete = sfn.Choice(
            self, "Job Complete?"
        )
        job_failed = sfn.Fail(
            self, "Job Failed",
            cause="AWS Batch Job Failed",
            error="DescribeJob returned FAILED"
        )
        final_status = sfn.Task(
            self, "Get Final Job Status",
            task=sfn_tasks.InvokeActivity(check_job_activity),
            input_path="$.guid",
        )

        definition = submit_job\
            .next(wait_x)\
            .next(get_status)\
            .next(is_complete
                  .when(sfn.Condition.string_equals(
                      "$.status", "FAILED"), job_failed)
                  .when(sfn.Condition.string_equals(
                      "$.status", "SUCCEEDED"), final_status)
                  .otherwise(wait_x))

        sfn.StateMachine(
            self, "StateMachine",
            definition=definition,
            timeout_sec=30,
        )
Пример #4
0
    def _lambda_glue_crawler_task(self):
        root_path = Path(os.path.dirname(os.path.abspath(__file__)))
        lambda_handler = root_path.joinpath('lambdas',
                                            'trigger_glue_crawler').as_posix()

        func = lambda_.Function(
            self,
            "TriggerGlueCrawlerLambdaHandler",
            handler="lambda.lambda_handler",
            code=lambda_.AssetCode(lambda_handler),
            environment={"crawlerName": f"{self.glue_crawler.name}"},
            initial_policy=[
                iam.PolicyStatement(
                    actions=["glue:StartCrawler"],
                    resources=["*"],
                ),
            ],
            timeout=core.Duration.seconds(30),
            runtime=lambda_.Runtime.PYTHON_3_7,
        )

        # turn the lambda into a stepfunction task so we can use it in our state machine
        task = sfn.Task(
            self,
            "TriggerGlueCrawlerLambda",
            task=sfnt.InvokeFunction(func),
        )

        return task
Пример #5
0
    def _emr_spark_step_task(self):
        # Add a EMR Step to run our pyspark job; an asset with our application will be
        # created and referenced in the job definition
        root_path = Path(os.path.dirname(os.path.abspath(__file__)))
        pyspark_script = root_path.joinpath('pyspark', 'example.py').as_posix()
        pyspark_example_asset = s3_assets.Asset(self,
                                                "PythonScript",
                                                path=pyspark_script)

        sample_spark_step = sfn.Task(
            self,
            "RunSparkExample",
            task=sfnt.EmrAddStep(
                # the concrete ClusterId will be picked up from the current state of the statem achine
                cluster_id=sfn.Data.string_at("$.ClusterId"),
                name="SparkExample",
                # `command-runner.jar` is a jar from AWS that can be used to execute generic command (like `spark-submit`)
                # if you write your programs in Java/Scala you can directly insert your jar file here instead of script location
                jar="command-runner.jar",
                args=[
                    "spark-submit",
                    "--deploy-mode",
                    "cluster",
                    "--master",
                    "yarn",
                    f"s3://{pyspark_example_asset.s3_bucket_name}/{pyspark_example_asset.s3_object_key}",
                ],
            ),
            result_path="DISCARD",
        )
        return sample_spark_step
Пример #6
0
def test_emr_add_step_task():
    default_task_json = {
        'End': True,
        'Parameters': {
            'ClusterId': 'test-cluster-id',
            'Step': {
                'Key1': {
                    'Key2': 'Value2'
                }
            }
        },
        'Resource': {
            'Fn::Join': [
                '',
                [
                    'arn:', {
                        'Ref': 'AWS::Partition'
                    }, ':states:::elasticmapreduce:addStep.sync'
                ]
            ]
        },
        'Type': 'Task'
    }

    stack = core.Stack(core.App(), 'test-stack')

    task = sfn.Task(stack,
                    'test-task',
                    task=emr_tasks.EmrAddStepTask('test-cluster-id',
                                                  {'Key1': {
                                                      'Key2': 'Value2'
                                                  }}))

    print_and_assert(default_task_json, task)
Пример #7
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 *,
                 message: sfn.TaskInput,
                 subject: Optional[str] = None,
                 topic: Optional[sns.Topic] = None,
                 result_path: str = '$.PublishResult',
                 output_path: str = '$',
                 cause: Optional[str] = None,
                 comment: Optional[str] = None,
                 error: Optional[str] = None):
        super().__init__(scope, id)

        self._end = sfn.Fail(self,
                             'Execution Failed',
                             cause=cause,
                             comment=comment,
                             error=error)

        if topic is not None:
            self._start = sfn.Task(
                self,
                'Failure Notification',
                input_path='$',
                output_path=output_path,
                result_path=result_path,
                task=sfn_tasks.PublishToTopic(topic,
                                              message=message,
                                              subject=subject))
            self._start.next(self._end)
        else:
            self._start = self._end
Пример #8
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        update_cluster_tags_lambda = emr_lambdas.UpdateClusterTagsBuilder.get_or_build(
            construct)

        return sfn.Task(
            construct,
            'Update Cluster Tags',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                update_cluster_tags_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value
                }))
Пример #9
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            default_fail_if_cluster_running: bool,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        fail_if_cluster_running_lambda = emr_lambdas.FailIfClusterRunningBuilder.get_or_build(
            construct)

        return sfn.Task(
            construct,
            'Fail If Cluster Running',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                fail_if_cluster_running_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'DefaultFailIfClusterRunning':
                    default_fail_if_cluster_running,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value
                }))
Пример #10
0
def test_start_execution_task():
    default_task_json = {
        'End': True,
        'Parameters': {
            'StateMachineArn': {
                'Ref': 'teststatemachine7F4C511D'
            },
            'Input.$': '$$.Execution.Input'
        },
        'Type': 'Task',
        'Resource': {
            'Fn::Join': [
                '',
                [
                    'arn:', {
                        'Ref': 'AWS::Partition'
                    }, ':states:::states:startExecution.sync'
                ]
            ]
        }
    }

    stack = core.Stack(core.App(), 'test-stack')

    state_machine = sfn.StateMachine(stack,
                                     'test-state-machine',
                                     definition=sfn.Chain.start(
                                         sfn.Succeed(stack, 'Succeeded')))

    task = sfn.Task(stack,
                    'test-task',
                    task=emr_tasks.StartExecutionTask(state_machine, ))

    print_and_assert(default_task_json, task)
Пример #11
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 *,
                 message: sfn.TaskInput,
                 subject: Optional[str] = None,
                 topic: Optional[sns.Topic] = None,
                 result_path: str = '$.PublishResult',
                 output_path: Optional[str] = None):
        super().__init__(scope, id)

        self._end = sfn.Succeed(self, 'Succeeded', output_path=output_path)

        if topic is not None:
            self._start = sfn.Task(
                self,
                'Success Notification',
                input_path='$',
                output_path='$',
                result_path=result_path,
                task=sfn_tasks.PublishToTopic(topic,
                                              message=message,
                                              subject=subject))
            self._start.next(self._end)
        else:
            self._start = self._end
Пример #12
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            override_cluster_configs_lambda: Optional[
                aws_lambda.Function] = None,
            allowed_cluster_config_overrides: Optional[Dict[str, str]] = None,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            output_path: str = '$',
            result_path: str = '$.ClusterConfiguration.Cluster') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        override_cluster_configs_lambda = \
            emr_lambdas.OverrideClusterConfigsBuilder.get_or_build(construct) \
            if override_cluster_configs_lambda is None \
            else override_cluster_configs_lambda

        return sfn.Task(
            construct,
            'Override Cluster Configs',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.InvokeFunction(
                override_cluster_configs_lambda,
                payload={
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value,
                    'AllowedClusterConfigOverrides':
                    allowed_cluster_config_overrides
                }))
    def create_state_machine(self, lambda_functions, page_sqs):

        task_wrapup = aws_stepfunctions.Task(
            self, "task_wrapup",
            task = aws_stepfunctions_tasks.RunLambdaTask(lambda_functions["wrapup"])
        )

        tast_analyze_with_scale = aws_stepfunctions.Task(
            self, "AnalyzeWithScale",
            task=  aws_stepfunctions_tasks.SendToQueue(
                queue = page_sqs, 
                message_body = aws_stepfunctions.TaskInput.from_object(
                    {
                        "token": aws_stepfunctions.Context.task_token,
                        "id.$": "$.id",
                        "bucket.$": "$.bucket",
                        "original_upload_pdf.$": "$.original_upload_pdf",
                        "SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN.$": "$.SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN",
                        "key.$": "$.key"
                    }
                ),
                delay=None, 
                integration_pattern=aws_stepfunctions.ServiceIntegrationPattern.WAIT_FOR_TASK_TOKEN
            )
        )

        process_map = aws_stepfunctions.Map(
            self, "Process_Map",
            items_path = "$.image_keys",
            result_path="DISCARD",
            parameters = {
                "id.$": "$.id",
                "bucket.$": "$.bucket",
                "original_upload_pdf.$": "$.original_upload_pdf",
                "SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN.$": "$.SAGEMAKER_WORKFLOW_AUGMENTED_AI_ARN",
                "key.$": "$$.Map.Item.Value"
            }
        ).iterator(tast_analyze_with_scale)

        definition = process_map.next(task_wrapup)

        aws_stepfunctions.StateMachine(
            scope = self, 
            id = "multipagepdfa2i_fancy_stepfunction",
            state_machine_name = "multipagepdfa2i_fancy_stepfunction",
            definition=definition
        )
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # Step Function Starts Here

        # The first thing we need to do is see if they are asking for pineapple on a pizza
        pineapple_check_lambda = _lambda.Function(self, "pineappleCheckLambdaHandler",
                                                  runtime=_lambda.Runtime.NODEJS_12_X,
                                                  handler="orderPizza.handler",
                                                  code=_lambda.Code.from_asset("lambdas"),
                                                  )

        # Step functions are built up of steps, we need to define our first step
        order_pizza = step_fn.Task(self, 'Order Pizza Job',
                                   task=step_fn_tasks.InvokeFunction(pineapple_check_lambda),
                                   input_path='$.flavour',
                                   result_path='$.pineappleAnalysis'
                                   )

        # Pizza Order failure step defined
        job_failed = step_fn.Fail(self, 'Sorry, We Dont add Pineapple',
                                  cause='Failed To Make Pizza',
                                  error='They asked for Pineapple')

        # If they didnt ask for pineapple let's cook the pizza
        cook_pizza = step_fn.Pass(self, 'Lets make your pizza')

        # If they ask for a pizza with pineapple, fail. Otherwise cook the pizza
        definition = step_fn.Chain \
            .start(order_pizza) \
            .next(step_fn.Choice(self, 'With Pineapple?') \
                  .when(step_fn.Condition.boolean_equals('$.pineappleAnalysis.containsPineapple', True), job_failed) \
                  .otherwise(cook_pizza))

        state_machine = step_fn.StateMachine(self, 'StateMachine', definition=definition, timeout=core.Duration.minutes(5))

        # Dead Letter Queue Setup
        dlq = sqs.Queue(self, 'stateMachineLambdaDLQ', visibility_timeout=core.Duration.seconds(300))

        # defines an AWS Lambda resource to connect to our API Gateway
        state_machine_lambda = _lambda.Function(self, "stateMachineLambdaHandler",
                                                runtime=_lambda.Runtime.NODEJS_12_X,
                                                handler="stateMachineLambda.handler",
                                                code=_lambda.Code.from_asset("lambdas"),
                                                environment={
                                                    'statemachine_arn': state_machine.state_machine_arn
                                                }
                                                )

        state_machine.grant_start_execution(state_machine_lambda)

        # defines an API Gateway REST API resource backed by our "sqs_publish_lambda" function.
        api_gw.LambdaRestApi(self, 'Endpoint',
                             handler=state_machine_lambda
                             )
Пример #15
0
    def __init__(self,
                 scope: core.Construct,
                 id: str,
                 name: str,
                 state_machine: sfn.StateMachine,
                 input: Optional[Mapping[str, any]] = None,
                 fail_chain: Optional[sfn.IChainable] = None):
        super().__init__(scope, id)

        state_machine_task = sfn.Task(
            self,
            name,
            task=emr_tasks.StartExecutionTask(
                state_machine=state_machine,
                input=input,
                integration_pattern=sfn.ServiceIntegrationPattern.SYNC))

        parse_json_string = emr_lambdas.ParseJsonStringBuilder.get_or_build(
            self)

        parse_json_string_task = sfn.Task(
            self,
            f'{name} - Parse JSON Output',
            result_path='$',
            task=sfn_tasks.InvokeFunction(
                parse_json_string,
                payload={
                    'JsonString': sfn.TaskInput.from_data_at('$.Output').value
                }))

        if fail_chain:
            state_machine_task.add_catch(fail_chain,
                                         errors=['States.ALL'],
                                         result_path='$.Error')
            parse_json_string_task.add_catch(fail_chain,
                                             errors=['States.ALL'],
                                             result_path='$.Error')

        state_machine_task.next(parse_json_string_task)

        self._start = state_machine_task
        self._end = parse_json_string_task
Пример #16
0
 def _emr_terminate_cluster_task(self):
     # Shutdown the cluster
     terminate_cluster = sfn.Task(
         self,
         "TerminateCluster",
         task=sfnt.EmrTerminateCluster(
             cluster_id=sfn.Data.string_at("$.ClusterId"),
             integration_pattern=sfn.ServiceIntegrationPattern.SYNC,
         ),
         result_path="DISCARD",
     )
     return terminate_cluster
Пример #17
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)
        #lf1= Function(self, id="my_stack_lambda", runtime=Runtime.PYTHON_3_7, handler='handlers/my_lambda_handler', code='', function_name='my_example_lambda')
        my_table = _dynamodb.Table(self,
                                   id='dynamoTable',
                                   table_name='testcdktabe',
                                   partition_key=_dynamodb.Attribute(
                                       name='lastname',
                                       type=_dynamodb.AttributeType.STRING))
        my_s3_bucket = _s3.Bucket(self,
                                  id='s3bucket',
                                  bucket_name='mynpbsample3bucket')

        my_lambda_function = _lambda.Function(
            self,
            id='lambdafunction',
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler='hello.handler',
            code=_lambda.Code.asset('lambdacode'))

        process_purchase_function = _lambda.Function(
            self,
            id='process_purchase',
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler='process_purchase.handler',
            code=_lambda.Code.asset('lambdacode'))

        process_refund_function = _lambda.Function(
            self,
            id='process_refund',
            runtime=_lambda.Runtime.PYTHON_3_7,
            handler='process_refund.handler',
            code=_lambda.Code.asset('lambdacode'))

        #start_state = sfn.Pass(self, "start_state")

        definition = sfn.Task(
            self,
            'Get Process Type',
            task=tasks.InvokeFunction(process_purchase_function))

        sfn.StateMachine(
            self,
            "MyStateMachine",
            definition=definition,
            timeout=core.Duration.seconds(30),
        )

        my_topic = sns.Topic(self,
                             "MyTopic",
                             display_name="Customer Subscription")
Пример #18
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # space for feeder Lambda function
        feeder = aws_lambda.Function(self,
                                                    id='_feeder',
                                                    code=aws_lambda.Code.asset('./code'),
                                                    handler='feeder.handler',
                                                    runtime=aws_lambda.Runtime.PYTHON_3_7,
                                                    description='Feeder function for the Witness project')

        # space for saver Lambda function
        saver = aws_lambda.Function(self,
                                                    id='_saver',
                                                    code=aws_lambda.Code.asset('./code'),
                                                    handler='saver.handler',
                                                    runtime=aws_lambda.Runtime.PYTHON_3_7,
                                                    description='Saver function for the Witness project')
        # space for feeder lambda trigger
        archive.add_event_notification(aws_s3.EventType.OBJECT_CREATED_PUT, s3n.LambdaDestination(feeder))


        # space for stepfunction
        feederTask = aws_stepfunctions.Task(        self,
                                                    id='_feederTask',
                                                    task=aws_tasks.InvokeFunction(feeder))

        saverTask = aws_stepfunctions.Task(         self,
                                                    id='_saverTask',
                                                    task=aws_tasks.InvokeFunction(saver))                                            

        definition = feederTask.next(saverTask)

        orchestrator = aws_stepfunctions.StateMachine(self,
                                                    id='_orchestrator',
                                                    state_machine_name='witness_orchestrator',
                                                    definition=definition)
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        logging_lambda = lambda_func.Function(
            scope=self,
            id="logging_lambda",
            function_name="logging_lambda",
            handler="logging-lambda.main",
            runtime=lambda_func.Runtime.PYTHON_3_7,
            code=lambda_func.Code.from_asset("./code"))

        second_lambda = lambda_func.Function(
            scope=self,
            id="second_lambda",
            function_name="second_lambda",
            handler="second-lambda.main",
            runtime=lambda_func.Runtime.PYTHON_3_7,
            code=lambda_func.Code.from_asset("./code"))

        logging_lambda_task = tasks.InvokeFunction(logging_lambda)
        logging_step = stepfunctions.Task(scope=self,
                                          id="invoke_logging_function",
                                          task=logging_lambda_task)

        second_lambda_task = tasks.InvokeFunction(second_lambda)
        second_step = stepfunctions.Task(scope=self,
                                         id="invoke_second_function",
                                         task=second_lambda_task)

        definition = logging_step.next(second_step)

        stepfunctions.StateMachine(
            scope=self,
            id="state_machine",
            state_machine_name="state_machine",
            definition=definition,
        )
Пример #20
0
    def __init__(self, app: core.App, cfn_name: str, stack_env):
        super().__init__(scope=app, id=f"{cfn_name}-{stack_env}")

        # lambda
        lambda_task = lambda_.Function(
            scope=self,
            id=f"{cfn_name}-lambda-task",
            code=lambda_.AssetCode.from_asset("lambda_script"),
            handler="lambda_handler.lambda_task",
            timeout=core.Duration.seconds(10),
            runtime=self.LAMBDA_PYTHON_RUNTIME,
            memory_size=128
        )

        # StepFunction Tasks
        sns_source = sfn.Pass(
            scope=self,
            id=f"{cfn_name}-sfn-pass",
            comment="pass example",
            input_path="$",
            result_path="$.source",
            result=sfn.Result.from_string("example"),
            output_path="$"
        )

        arguments_generation = sfn.Task(
            scope=self,
            id=f"{cfn_name}-sfn-lambda-task",
            task=sfn_tasks.RunLambdaTask(
                lambda_function=lambda_task,
                payload=sfn.TaskInput.from_object({
                    "time.$": "$.time",
                    "source.$": "$.source"
                })),
            input_path="$",
            result_path="$.arguments",
            output_path="$.arguments.Payload"
        )

        # stepfunctions
        definition = sns_source.next(arguments_generation)

        _ = sfn.StateMachine(
            scope=self,
            id=f"{cfn_name}-SFn-{stack_env}",
            definition=definition
        )
Пример #21
0
    def build(scope: core.Construct,
              id: str,
              *,
              name: str,
              cluster_id: str,
              result_path: Optional[str] = None,
              output_path: Optional[str] = None) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Task ids
        construct = core.Construct(scope, id)

        return sfn.Task(
            construct,
            name,
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.EmrTerminateCluster(
                cluster_id=cluster_id,
                integration_pattern=sfn.ServiceIntegrationPattern.SYNC))
Пример #22
0
    def build(scope: core.Construct,
              id: str,
              *,
              cluster_name: str,
              cluster_tags: List[core.Tag],
              profile_namespace: str,
              profile_name: str,
              configuration_namespace: str,
              configuration_name: str,
              output_path: str = '$',
              result_path: str = '$.ClusterConfiguration') -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        load_cluster_configuration_lambda = emr_lambdas.LoadClusterConfigurationBuilder.build(
            construct,
            profile_namespace=profile_namespace,
            profile_name=profile_name,
            configuration_namespace=configuration_namespace,
            configuration_name=configuration_name)

        return sfn.Task(construct,
                        'Load Cluster Configuration',
                        output_path=output_path,
                        result_path=result_path,
                        task=sfn_tasks.InvokeFunction(
                            load_cluster_configuration_lambda,
                            payload={
                                'ClusterName':
                                cluster_name,
                                'ClusterTags': [{
                                    'Key': t.key,
                                    'Value': t.value
                                } for t in cluster_tags],
                                'ProfileNamespace':
                                profile_namespace,
                                'ProfileName':
                                profile_name,
                                'ConfigurationNamespace':
                                configuration_namespace,
                                'ConfigurationName':
                                configuration_name,
                            }))
Пример #23
0
 def _emr_create_cluster_task(self, pipeline_name):
     # Let the Stepfunction create a uniform instance group cluster
     # with 1 Master and 5 Core nodes
     create_cluster = sfn.Task(
         self,
         "CreateCluster",
         # this is very similar to the specification menu in AWS UI we used during the course
         task=sfnt.EmrCreateCluster(
             name=pipeline_name,
             applications=[
                 sfnt.EmrCreateCluster.ApplicationConfigProperty(
                     name="spark")
             ],
             # specify the cluster worker/master hardware
             instances=sfnt.EmrCreateCluster.InstancesConfigProperty(
                 instance_groups=[
                     sfnt.EmrCreateCluster.InstanceGroupConfigProperty(
                         instance_count=1,
                         instance_role=sfnt.EmrCreateCluster.
                         InstanceRoleType.MASTER,
                         instance_type="m5.xlarge",
                         name="Master",
                     ),
                     sfnt.EmrCreateCluster.InstanceGroupConfigProperty(
                         instance_count=4,
                         instance_role=sfnt.EmrCreateCluster.
                         InstanceRoleType.CORE,
                         instance_type="m5.xlarge",
                         name="Core",
                     ),
                 ], ),
             cluster_role=self.emr_instance_role,
             service_role=self.emr_service_role,
             release_label="emr-6.0.0",
             log_uri=
             f"s3://{self.emr_logging_bucket.bucket_name}/{pipeline_name}"),
         # we output the ClusterId on the state machine status
         output_path="$.ClusterId",
         result_path="$.ClusterId",
     )
     return create_cluster
Пример #24
0
    def build(scope: core.Construct,
              id: str,
              *,
              emr_step: emr_code.EMRStep,
              cluster_id: str,
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_step_completion: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Task ids
        construct = core.Construct(scope, id)
        resolved_step = emr_step.resolve(construct)

        integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_step_completion \
            else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET

        return sfn.Task(construct,
                        emr_step.name,
                        output_path=output_path,
                        result_path=result_path,
                        task=EmrAddStepTask(
                            cluster_id=cluster_id,
                            step=resolved_step,
                            integration_pattern=integration_pattern))
Пример #25
0
    def build(
            scope: core.Construct,
            id: str,
            *,
            roles: emr_roles.EMRRoles,
            cluster_configuration_path: str = '$.ClusterConfiguration.Cluster',
            result_path: Optional[str] = None,
            output_path: Optional[str] = None,
            wait_for_cluster_start: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        integration_pattern = sfn.ServiceIntegrationPattern.SYNC if wait_for_cluster_start \
            else sfn.ServiceIntegrationPattern.FIRE_AND_FORGET

        return sfn.Task(
            construct,
            'Start EMR Cluster',
            output_path=output_path,
            result_path=result_path,
            task=EmrCreateClusterTask(
                roles=roles,
                cluster_configuration_path=cluster_configuration_path,
                integration_pattern=integration_pattern))
Пример #26
0
    def _lambda_quality_check_task(self):
        lambda_role = iam.Role(
            self,
            id=f"QualityLambdaRole",
            assumed_by=iam.ServicePrincipal("lambda.amazonaws.com"),
            managed_policies=[
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonS3FullAccess"),
                iam.ManagedPolicy.from_aws_managed_policy_name(
                    "AmazonAthenaFullAccess"),
            ],
        )

        root_path = Path(os.path.dirname(os.path.abspath(__file__)))
        lambda_handler = root_path.joinpath('lambdas',
                                            'quality_check').as_posix()

        func = lambda_.Function(
            self,
            "QualityCheckAthenaLambdaHandler",
            handler="lambda.lambda_handler",
            code=lambda_.AssetCode(lambda_handler),
            environment={"athenaDatabase": f"{self.glue_db_name}"},
            role=lambda_role,
            timeout=core.Duration.seconds(30),
            runtime=lambda_.Runtime.PYTHON_3_7,
        )

        # turn the lambda into a stepfunction task so we can use it in our state machine
        task = sfn.Task(
            self,
            "QualityCheckAthenaLambda",
            task=sfnt.InvokeFunction(func),
        )

        return task
Пример #27
0
    def __init__(self, app: core.App, id: str, **kwargs) -> None:
        super().__init__(app, id, **kwargs)

        # Create both lambdas

        with open("lambda-submit.py", encoding="utf8") as fp:
            lambda_submit_code = fp.read()

        lambdaFn1 = lambda_.Function(
            self,
            "submitsmbatch",
            code=lambda_.InlineCode(lambda_submit_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(300),
            runtime=lambda_.Runtime.PYTHON_3_7,
            environment={
            "transform_job_name":transform_job_name,
            "model_name":model_name,
            "max_concurrent":max_concurrent,
            "max_payload_size":max_payload_size,
            "s3_uri_in":s3_uri_in,
            "s3_uri_out":s3_uri_out,
            "instance_type":instance_type,
            "instance_count":instance_count,
            }
        )

        # Add perms
        lambdaFn1.add_to_role_policy(aws_iam.PolicyStatement(
            actions = ['sagemaker:CreateTransformJob',],
            resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),]
            ))

       
        with open("lambda-check.py", encoding="utf8") as fp:
            lambda_check_code = fp.read()

        lambdaFn2 = lambda_.Function(
            self,
            "checksmbatch",
            code=lambda_.InlineCode(lambda_check_code),
            handler="index.lambda_handler",
            timeout=core.Duration.seconds(300),
            runtime=lambda_.Runtime.PYTHON_3_7,
            environment={"model_name":model_name, # CHANGE TO YOUR ENDPOINT NAME!!
                        "content_type":"text/csv"}
        )
        # Add perms
        lambdaFn2.add_to_role_policy(aws_iam.PolicyStatement(
            actions = ['sagemaker:DescribeTransformJob',],
            resources = ['arn:aws:sagemaker:{}:{}:transform-job/{}*'.format(my_region,my_acc_id,transform_job_name),]
            ))
        # Define state machine

        # submit_job_activity = sfn.Activity(
        #     self, "SubmitJob"
        # )
        # check_job_activity = sfn.Activity(
        #     self, "CheckJob"
        # )

        submit_job = sfn.Task(
            self, "Submit Job",
            task=sfn_tasks.InvokeFunction(lambdaFn1),
        )

        wait_x = sfn.Wait(
            self, "Wait 1 minute",
            time=sfn.WaitTime.duration(core.Duration.minutes(1)),
        )
        get_status = sfn.Task(
            self, "Get Job Status",
            task=sfn_tasks.InvokeFunction(lambdaFn2),
        )
        is_complete = sfn.Choice(
            self, "Job Complete?"
        )
        job_failed = sfn.Fail(
            self, "Job Failed",
            cause="AWS Batch Job Failed",
            error="DescribeJob returned FAILED"
        )
        final_status = sfn.Task(
            self, "Get Final Job Status",
            task=sfn_tasks.InvokeFunction(lambdaFn2),
        )

        definition = submit_job\
            .next(wait_x)\
            .next(get_status)\
            .next(is_complete
                  .when(sfn.Condition.string_equals(
                      "$.status", "Failed"), job_failed)
                  .when(sfn.Condition.string_equals(
                      "$.status", "Completed"), final_status)
                  .otherwise(wait_x))

        sfn.StateMachine(
            self, "SMbatchInference",
            definition=definition,
        )
Пример #28
0
    def build(scope: core.Construct,
              id: str,
              *,
              roles: emr_roles.EMRRoles,
              kerberos_attributes_secret: Optional[
                  secretsmanager.Secret] = None,
              secret_configurations: Optional[Dict[
                  str, secretsmanager.Secret]] = None,
              cluster_configuration_path: str = '$.ClusterConfiguration',
              result_path: Optional[str] = None,
              output_path: Optional[str] = None,
              wait_for_cluster_start: bool = True) -> sfn.Task:
        # We use a nested Construct to avoid collisions with Lambda and Task ids
        construct = core.Construct(scope, id)

        event_rule = core.Stack.of(scope).node.try_find_child('EventRule')
        if event_rule is None:
            event_rule = events.Rule(construct,
                                     'EventRule',
                                     enabled=False,
                                     schedule=events.Schedule.rate(
                                         core.Duration.minutes(1)))
            BaseBuilder.tag_construct(event_rule)

        run_job_flow_lambda = emr_lambdas.RunJobFlowBuilder.get_or_build(
            construct, roles, event_rule)
        check_cluster_status_lambda = emr_lambdas.CheckClusterStatusBuilder.get_or_build(
            construct, event_rule)

        if kerberos_attributes_secret:
            run_job_flow_lambda.add_to_role_policy(
                iam.PolicyStatement(
                    effect=iam.Effect.ALLOW,
                    actions=['secretsmanager:GetSecretValue'],
                    resources=[f'{kerberos_attributes_secret.secret_arn}*']))

        if secret_configurations is not None:
            for secret in secret_configurations.values():
                run_job_flow_lambda.add_to_role_policy(
                    iam.PolicyStatement(
                        effect=iam.Effect.ALLOW,
                        actions=['secretsmanager:GetSecretValue'],
                        resources=[f'{secret.secret_arn}*']))

        return sfn.Task(
            construct,
            'Start EMR Cluster (with Secrets)',
            output_path=output_path,
            result_path=result_path,
            task=sfn_tasks.RunLambdaTask(
                run_job_flow_lambda,
                integration_pattern=sfn.ServiceIntegrationPattern.
                WAIT_FOR_TASK_TOKEN,
                payload=sfn.TaskInput.from_object({
                    'ExecutionInput':
                    sfn.TaskInput.from_context_at('$$.Execution.Input').value,
                    'ClusterConfiguration':
                    sfn.TaskInput.from_data_at(
                        cluster_configuration_path).value,
                    'TaskToken':
                    sfn.Context.task_token,
                    'CheckStatusLambda':
                    check_cluster_status_lambda.function_arn,
                    'RuleName':
                    event_rule.rule_name,
                    'FireAndForget':
                    not wait_for_cluster_start
                })))
Пример #29
0
    def __init__(self, scope: core.Construct, id: str, **kwargs) -> None:
        super().__init__(scope, id, **kwargs)

        # The start of the image pipeline
        imageBucket = aws_s3.Bucket(self, "imageBucket")

        # Capture API activity with a trail
        imageBucketTrail = aws_cloudtrail.Trail(self,
                                                "imageBucketTrail",
                                                is_multi_region_trail=False)

        # Restrict to S3 data-plane events
        imageBucketTrail.add_s3_event_selector(
            include_management_events=False,
            prefixes=[f"{imageBucket.bucket_arn}/"],
            read_write_type=aws_cloudtrail.ReadWriteType.WRITE_ONLY)

        # Filter to just PutObject and CopyObject events
        imageBucketRule = aws_events.Rule(
            self,
            "imageBucketRule",
            event_pattern={
                "source": ["aws.s3"],
                "detail": {
                    "eventSource": ["s3.amazonaws.com"],
                    "eventName": ["PutObject", "CopyObject"],
                    "requestParameters": {
                        "bucketName": [imageBucket.bucket_name]
                    }
                }
            })

        #--
        #  Lambda Layers
        #--------------------#

        opencvLayer = aws_lambda.LayerVersion(
            self,
            'opencvLayer',
            code=aws_lambda.AssetCode('layers/opencvLayer'),
            compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_6])

        boto3Layer = aws_lambda.LayerVersion(
            self,
            'boto3Layer',
            code=aws_lambda.AssetCode('layers/boto3Layer'),
            compatible_runtimes=[aws_lambda.Runtime.PYTHON_3_6])

        #--
        #  Lambda Functions
        #--------------------#

        # Gather info about an image, name, extension, etc
        getImageInfoFunc = aws_lambda.Function(
            self,
            "getImageInfoFunc",
            code=aws_lambda.AssetCode('functions/getImageInfoFunc'),
            handler="lambda.handler",
            runtime=aws_lambda.Runtime.PYTHON_3_6)

        # The home for the website
        webBucket = aws_s3.Bucket(self,
                                  "webBucket",
                                  website_index_document='index.html')

        # Copy the image to the web bucket
        copyImageFunc = aws_lambda.Function(
            self,
            "copyImageFunc",
            code=aws_lambda.AssetCode('functions/copyImageFunc'),
            handler="lambda.handler",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            layers=[boto3Layer],
            environment={
                'OUTPUTBUCKET': webBucket.bucket_name,
                'OUTPUTPREFIX': 'images/'
            })

        # Grant permissions to read from the source and write to the desination
        imageBucket.grant_read(copyImageFunc)
        webBucket.grant_write(copyImageFunc)

        # Create a thumbnail of the image and place in the web bucket
        createThumbnailFunc = aws_lambda.Function(
            self,
            "createThumbnailFunc",
            code=aws_lambda.AssetCode('functions/createThumbnailFunc'),
            handler="lambda.handler",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            layers=[boto3Layer, opencvLayer],
            timeout=core.Duration.seconds(10),
            memory_size=256,
            environment={
                'OUTPUTBUCKET': webBucket.bucket_name,
                'OUTPUTPREFIX': 'images/'
            })

        # Grant permissions to read from the source and write to the desination
        imageBucket.grant_read(createThumbnailFunc)
        webBucket.grant_write(createThumbnailFunc)

        # Store page information
        pageTable = aws_dynamodb.Table(
            self,
            'pageTable',
            partition_key={
                'name': 'pageName',
                'type': aws_dynamodb.AttributeType.STRING
            },
            billing_mode=aws_dynamodb.BillingMode.PAY_PER_REQUEST,
            stream=aws_dynamodb.StreamViewType.NEW_IMAGE)

        # Save page and image information
        updatePageInfoFunc = aws_lambda.Function(
            self,
            "updatePageInfoFunc",
            code=aws_lambda.AssetCode('functions/updatePageInfoFunc'),
            handler="lambda.handler",
            runtime=aws_lambda.Runtime.PYTHON_3_6,
            layers=[boto3Layer],
            environment={
                'PAGETABLE': pageTable.table_name,
                'PAGEPREFIX': 'posts/'
            })

        # Grant permissions to write to the page table
        pageTable.grant_write_data(updatePageInfoFunc)

        imagePipelineDone = aws_stepfunctions.Succeed(self,
                                                      "Done processing image")

        updatePageInfoJob = aws_stepfunctions.Task(
            self,
            'Update page info',
            task=aws_stepfunctions_tasks.InvokeFunction(updatePageInfoFunc))
        updatePageInfoJob.next(imagePipelineDone)

        copyImageJob = aws_stepfunctions.Task(
            self,
            'Copy image',
            task=aws_stepfunctions_tasks.InvokeFunction(copyImageFunc))

        createThumbnailJob = aws_stepfunctions.Task(
            self,
            'Create thumbnail',
            task=aws_stepfunctions_tasks.InvokeFunction(createThumbnailFunc))

        # These tasks can be done in parallel
        processImage = aws_stepfunctions.Parallel(self,
                                                  'Process image',
                                                  result_path="$.images")

        processImage.branch(copyImageJob)
        processImage.branch(createThumbnailJob)
        processImage.next(updatePageInfoJob)

        # Results of file extension check
        notPng = aws_stepfunctions.Succeed(self, "Not a PNG")

        # Verify the file extension
        checkForPng = aws_stepfunctions.Choice(self, 'Is a PNG?')
        checkForPng.when(
            aws_stepfunctions.Condition.string_equals('$.extension', 'png'),
            processImage)
        checkForPng.otherwise(notPng)

        # A single image pipeline job for testing
        getImageInfoJob = aws_stepfunctions.Task(
            self,
            'Get image info',
            task=aws_stepfunctions_tasks.InvokeFunction(getImageInfoFunc))
        getImageInfoJob.next(checkForPng)

        # Configure the image pipeline and starting state
        imagePipeline = aws_stepfunctions.StateMachine(
            self, "imagePipeline", definition=getImageInfoJob)

        # Matching events start the image pipline
        imageBucketRule.add_target(
            aws_events_targets.SfnStateMachine(
                imagePipeline,
                input=aws_events.RuleTargetInput.from_event_path(
                    "$.detail.requestParameters")))
Пример #30
0
def test_emr_create_cluster_task():
    default_task_json = {
        'End': True,
        'Parameters': {
            'AdditionalInfo.$':
            '$.ClusterConfiguration.Cluster.AdditionalInfo',
            'AmiVersion.$':
            '$.ClusterConfiguration.Cluster.AmiVersion',
            'Applications.$':
            '$.ClusterConfiguration.Cluster.Applications',
            'AutoScalingRole.$':
            '$.ClusterConfiguration.Cluster.AutoScalingRole',
            'BootstrapActions.$':
            '$.ClusterConfiguration.Cluster.BootstrapActions',
            'Configurations.$':
            '$.ClusterConfiguration.Cluster.Configurations',
            'CustomAmiId.$':
            '$.ClusterConfiguration.Cluster.CustomAmiId',
            'EbsRootVolumeSize.$':
            '$.ClusterConfiguration.Cluster.EbsRootVolumeSize',
            'Instances': {
                'AdditionalMasterSecurityGroups.$':
                '$.ClusterConfiguration.Cluster.Instances.AdditionalMasterSecurityGroups',
                'AdditionalSlaveSecurityGroups.$':
                '$.ClusterConfiguration.Cluster.Instances.AdditionalSlaveSecurityGroups',
                'Ec2KeyName.$':
                '$.ClusterConfiguration.Cluster.Instances.Ec2KeyName',
                'Ec2SubnetId.$':
                '$.ClusterConfiguration.Cluster.Instances.Ec2SubnetId',
                'Ec2SubnetIds.$':
                '$.ClusterConfiguration.Cluster.Instances.Ec2SubnetIds',
                'EmrManagedMasterSecurityGroup.$':
                '$.ClusterConfiguration.Cluster.Instances.EmrManagedMasterSecurityGroup',
                'EmrManagedSlaveSecurityGroup.$':
                '$.ClusterConfiguration.Cluster.Instances.EmrManagedSlaveSecurityGroup',
                'HadoopVersion.$':
                '$.ClusterConfiguration.Cluster.Instances.HadoopVersion',
                'InstanceCount.$':
                '$.ClusterConfiguration.Cluster.Instances.InstanceCount',
                'InstanceFleets.$':
                '$.ClusterConfiguration.Cluster.Instances.InstanceFleets',
                'InstanceGroups.$':
                '$.ClusterConfiguration.Cluster.Instances.InstanceGroups',
                'KeepJobFlowAliveWhenNoSteps':
                True,
                'MasterInstanceType.$':
                '$.ClusterConfiguration.Cluster.Instances.MasterInstanceType',
                'Placement.$':
                '$.ClusterConfiguration.Cluster.Instances.Placement',
                'ServiceAccessSecurityGroup.$':
                '$.ClusterConfiguration.Cluster.Instances.ServiceAccessSecurityGroup',
                'SlaveInstanceType.$':
                '$.ClusterConfiguration.Cluster.Instances.SlaveInstanceType',
                'TerminationProtected.$':
                '$.ClusterConfiguration.Cluster.Instances.TerminationProtected'
            },
            'JobFlowRole.$':
            '$.ClusterConfiguration.Cluster.JobFlowRole',
            'KerberosAttributes.$':
            '$.ClusterConfiguration.Cluster.KerberosAttributes',
            'LogUri.$':
            '$.ClusterConfiguration.Cluster.LogUri',
            'ManagedScalingPolicy.$':
            '$.ClusterConfiguration.Cluster.ManagedScalingPolicy',
            'Name.$':
            '$.ClusterConfiguration.Cluster.Name',
            'NewSupportedProducts.$':
            '$.ClusterConfiguration.Cluster.NewSupportedProducts',
            'ReleaseLabel.$':
            '$.ClusterConfiguration.Cluster.ReleaseLabel',
            'RepoUpgradeOnBoot.$':
            '$.ClusterConfiguration.Cluster.RepoUpgradeOnBoot',
            'ScaleDownBehavior.$':
            '$.ClusterConfiguration.Cluster.ScaleDownBehavior',
            'SecurityConfiguration.$':
            '$.ClusterConfiguration.Cluster.SecurityConfiguration',
            'ServiceRole.$':
            '$.ClusterConfiguration.Cluster.ServiceRole',
            'StepConcurrencyLevel.$':
            '$.ClusterConfiguration.Cluster.StepConcurrencyLevel',
            'SupportedProducts.$':
            '$.ClusterConfiguration.Cluster.SupportedProducts',
            'Tags.$':
            '$.ClusterConfiguration.Cluster.Tags',
            'VisibleToAllUsers.$':
            '$.ClusterConfiguration.Cluster.VisibleToAllUsers'
        },
        'Resource': {
            'Fn::Join': [
                '',
                [
                    'arn:', {
                        'Ref': 'AWS::Partition'
                    }, ':states:::elasticmapreduce:createCluster.sync'
                ]
            ]
        },
        'Type': 'Task'
    }

    stack = core.Stack(core.App(), 'test-stack')

    task = sfn.Task(
        stack,
        'test-task',
        task=emr_tasks.EmrCreateClusterTask(
            roles=emr_profile.EMRRoles(stack,
                                       'test-emr-roles',
                                       role_name_prefix='test-roles'),
            cluster_configuration_path='$.ClusterConfiguration.Cluster',
        ))

    print_and_assert(default_task_json, task)