Пример #1
0
def workflow(client):
    workflow = Workflow(name=state_machine_name,
                        definition=definition,
                        role=role_arn,
                        client=client)
    workflow.create()
    return workflow
Пример #2
0
def test_workflow_update_when_statemachinearn_is_none(client):
    workflow = Workflow(name=state_machine_name,
                        definition=definition,
                        role=role_arn,
                        client=client)
    new_definition = steps.Pass('HelloWorld')
    with pytest.raises(WorkflowNotFound):
        workflow.update(definition=new_definition)
Пример #3
0
def get_existing_monitor_pipeline(workflow_arn):
    """
    Create a dummpy implementation of get existing data pipeline
    """
    data_pipeline = Workflow(
        name="data_pipeline_name",
        definition=Chain([]),
        role="workflow_execution_role",
    )

    return data_pipeline.attach(workflow_arn)
Пример #4
0
def get_existing_training_pipeline(workflow_arn):
    """
    Create a dummpy implementation of get existing training pipeline
    """
    training_pipeline = Workflow(
        name="training_pipeline_name",
        definition=Chain([]),
        role="workflow_execution_role",
    )

    return training_pipeline.attach(workflow_arn)
Пример #5
0
def get_existing_inference_pipeline(workflow_arn):
    """
    Create a dummy implementation to get existing training pipeline

    TODO: This could be a good PR for the SDK.
    """
    inference_pipeline = Workflow(
        name="inference_pipeline_name",
        definition=Chain([]),
        role="workflow_execution_role",
    )

    return inference_pipeline.attach(workflow_arn)
 def _build_workflow(self):
     """create a step functions workflow from the chain_of_tasks."""
     logger.debug(
         f"creating a chain from all the different steps. \n {self.chain_of_tasks}"
     )
     workflow_definition = steps.Chain(self.chain_of_tasks)
     logger.debug(f"creating a workflow with name {self.unique_name}")
     self.client = boto3.client("stepfunctions")
     self.workflow = Workflow(
         name=self.unique_name,
         definition=workflow_definition,
         role=self.role.role_arn,
         client=self.client,
     )
def create_workflow_and_check_definition(workflow_graph, workflow_name,
                                         sfn_client, sfn_role_arn):
    # Create workflow
    workflow = Workflow(name=workflow_name,
                        definition=workflow_graph,
                        role=sfn_role_arn,
                        client=sfn_client)
    state_machine_arn = workflow.create()

    # Check workflow definition
    state_machine_desc = sfn_client.describe_state_machine(
        stateMachineArn=state_machine_arn)
    assert workflow.definition.to_dict() == json.loads(
        state_machine_desc.get('definition'))

    return workflow
Пример #8
0
def workflow(client):
    execution_input = ExecutionInput()

    test_step_01 = Pass(state_id='StateOne',
                        parameters={
                            'ParamA': execution_input['Key02']['Key03'],
                            'ParamD': execution_input['Key01']['Key03'],
                        })

    test_step_02 = Pass(state_id='StateTwo',
                        parameters={
                            'ParamC': execution_input["Key05"],
                            "ParamB": "SampleValueB",
                            "ParamE":
                            test_step_01.output()["Response"]["Key04"]
                        })

    test_step_03 = Pass(state_id='StateThree',
                        parameters={
                            'ParamG': "SampleValueG",
                            "ParamF": execution_input["Key06"],
                            "ParamH": "SampleValueH",
                            "ParamI": test_step_02.output()
                        })

    workflow_definition = Chain([test_step_01, test_step_02, test_step_03])
    workflow = Workflow(name='TestWorkflow',
                        definition=workflow_definition,
                        role='testRoleArn',
                        execution_input=execution_input,
                        client=client)
    return workflow
Пример #9
0
def main():
    stepfunctions.set_stream_logger(level=logging.INFO)
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    # Load job name
    with open('./stepfunctions_name.json', 'r') as f:
        stepfunctions_name = json.load(f)

    with open('./face_clip/aws_batch/batch_names.json', 'r') as f:
        face_clip_name = json.load(f)
        
    with open('./tag_extraction/aws_batch/batch_names.json', 'r') as f:
        tag_extraction_name = json.load(f)

    # Define steps
    face_clip_step = steps.BatchSubmitJobStep(
        state_id = 'Face Clip Step',
        parameters={
            'JobDefinition': face_clip_name['jobDefinition'],
            'JobName': face_clip_name['job'],
            'JobQueue': face_clip_name['jobQueue']
        }
    )

    tag_extraction_step = steps.BatchSubmitJobStep(
        state_id = 'Tag Extraction Step',
        parameters={
            'JobDefinition': tag_extraction_name['jobDefinition'],
            'JobName': tag_extraction_name['job'],
            'JobQueue': tag_extraction_name['jobQueue']
        }
    )

    # Define workflow
    chain_list = [face_clip_step, tag_extraction_step]
    workflow_definition = steps.Chain(chain_list)

    workflow = Workflow(
        name=stepfunctions_name['workflow'],
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    #  workflow
    workflow.create()
Пример #10
0
def test_workflow_creation_failure_duplicate_state_ids(client):
    improper_definition = steps.Chain(
        [steps.Pass('HelloWorld'),
         steps.Succeed('HelloWorld')])
    with pytest.raises(ValueError):
        workflow = Workflow(name=state_machine_name,
                            definition=improper_definition,
                            role=role_arn,
                            client=client)
def test_catch_state_machine_creation(sfn_client, sfn_role_arn, training_job_parameters):
    catch_state_name = "TaskWithCatchState"
    custom_error = "CustomError"
    task_failed_error = "States.TaskFailed"
    all_fail_error = "States.ALL"
    custom_error_state_name = "Custom Error End"
    task_failed_state_name = "Task Failed End"
    all_error_state_name = "Catch All End"
    catch_state_result = "Catch Result"
    task_resource = "arn:aws:states:::sagemaker:createTrainingJob.sync"

    # change the parameters to cause task state to fail
    training_job_parameters["AlgorithmSpecification"]["TrainingImage"] = "not_an_image"

    asl_state_machine_definition = {
        "StartAt": catch_state_name,
        "States": {
            catch_state_name: {
                "Resource": task_resource,
                "Parameters": training_job_parameters,
                "Type": "Task",
                "End": True,
                "Catch": [
                    {
                        "ErrorEquals": [
                            all_fail_error
                        ],
                        "Next": all_error_state_name
                    }
                ]
            },
            all_error_state_name: {
                "Type": "Pass",
                "Result": catch_state_result,
                "End": True
            }
        }
    }
    task = steps.Task(
        catch_state_name,
        parameters=training_job_parameters,
        resource=task_resource
    )
    task.add_catch(
        steps.Catch(
            error_equals=[all_fail_error], 
            next_step=steps.Pass(all_error_state_name, result=catch_state_result)
        )
    )

    workflow = Workflow(
        'Test_Catch_Workflow',
        definition=task,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, catch_state_result)
def test_parallel_state_machine_creation(sfn_client, sfn_role_arn):
    parallel_state_name = "Parallel"
    left_pass_name = "Left Pass"
    right_pass_name = "Right Pass"
    final_state_name = "Final State"
    parallel_state_result = "Parallel Result"

    asl_state_machine_definition = {
        "StartAt": parallel_state_name,
        "States": {
            parallel_state_name: {
                "Type": "Parallel",
                "Next": final_state_name,
                "Branches": [
                    {
                        "StartAt": left_pass_name,
                        "States": {
                            left_pass_name: {
                                "Type": "Pass",
                                "End": True
                            }
                        }
                    },
                    {
                        "StartAt": right_pass_name,
                        "States": {
                            right_pass_name: {
                                "Type": "Pass",
                                "End": True
                            }
                        }
                    }
                ]
            },
            final_state_name: {
                "Type": "Pass",
                "Result": parallel_state_result,
                "End": True
            }
        }
    }
    parallel_waits = steps.Parallel(parallel_state_name)
    parallel_waits.add_branch(steps.Pass(left_pass_name))
    parallel_waits.add_branch(steps.Pass(right_pass_name))

    definition = steps.Chain([
        parallel_waits,
        steps.Pass(final_state_name, result=parallel_state_result)
    ])

    workflow = Workflow(
        'Test_Parallel_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, parallel_state_result)
def test_map_state_machine_creation(sfn_client, sfn_role_arn):
    map_state_name = "Map State"
    iterated_state_name = "Pass State"
    final_state_name = "Final State"
    items_path = "$.array"
    max_concurrency = 0
    map_state_result = "Map Result"
    state_machine_input = {
        "array": [1, 2, 3]
    }

    asl_state_machine_definition = {
        "StartAt": map_state_name,
        "States": {
            map_state_name: {
                "ItemsPath": items_path,
                "Iterator": {
                    "StartAt": iterated_state_name,
                    "States": {
                        iterated_state_name: {
                            "Type": "Pass",
                            "End": True
                        }
                    }
                },
                "MaxConcurrency": max_concurrency,
                "Type": "Map",
                "Next": final_state_name
            },
            final_state_name: {
                "Type": "Pass",
                "Result": map_state_result,
                "End": True
            }
        }
    }

    map_state = steps.Map(
        map_state_name, 
        items_path=items_path,
        iterator=steps.Pass(iterated_state_name), 
        max_concurrency=max_concurrency)

    definition = steps.Chain([
        map_state,
        steps.Pass(final_state_name, result=map_state_result)
    ])

    workflow = Workflow(
        'Test_Map_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, map_state_result, state_machine_input)
def test_retry_state_machine_creation(sfn_client, sfn_role_arn, training_job_parameters):
    retry_state_name = "RetryStateName"
    all_fail_error = "Starts.ALL"
    interval_seconds = 1
    max_attempts = 2
    backoff_rate = 2
    task_resource = "arn:aws:states:::sagemaker:createTrainingJob.sync"

    # change the parameters to cause task state to fail
    training_job_parameters["AlgorithmSpecification"]["TrainingImage"] = "not_an_image"

    asl_state_machine_definition = {
        "StartAt": retry_state_name,
        "States": {
            retry_state_name: {
                "Resource": task_resource,
                "Parameters": training_job_parameters,
                "Type": "Task",
                "End": True,
                "Retry": [
                    {
                        "ErrorEquals": [all_fail_error],
                        "IntervalSeconds": interval_seconds,
                        "MaxAttempts": max_attempts,
                        "BackoffRate": backoff_rate
                    }
                ]
            }
        }
    }

    task = steps.Task(
        retry_state_name,
        parameters=training_job_parameters,
        resource=task_resource
    )

    task.add_retry(
        steps.Retry(
            error_equals=[all_fail_error], 
            interval_seconds=interval_seconds, 
            max_attempts=max_attempts, 
            backoff_rate=backoff_rate
        )
    )

    workflow = Workflow(
        'Test_Retry_Workflow',
        definition=task,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, None)
    def __init__(self,
                 preprocessor,
                 estimator,
                 inputs,
                 s3_bucket,
                 role,
                 client=None,
                 **kwargs):
        """
        Args:
            preprocessor (sagemaker.estimator.EstimatorBase): The estimator used to preprocess and transform the training data. 
            estimator (sagemaker.estimator.EstimatorBase): The estimator to use for training. Can be a BYO estimator, Framework estimator or Amazon algorithm estimator.
            role (str): An AWS IAM role (either name or full Amazon Resource Name (ARN)). This role is used to create, manage, and execute the Step Functions workflows.
            inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms:

                * (str) - The S3 location where training data is saved.
                * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects.
                * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details.
                * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm.
                * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data.
            s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method.
            client (SFN.Client, optional): boto3 client to use for creating and interacting with the inference pipeline in Step Functions. (default: None)

        Keyword Args:
            compression_type (str, optional): Compression type (Gzip/None) of the file for TransformJob. (default:None)
            content_type (str, optional): Content type (MIME) of the document to be used in preprocessing script. See SageMaker documentation for more details. (default:None)
            pipeline_name (str, optional): Name of the pipeline. This name will be used to name jobs (if not provided when calling execute()), models, endpoints, and S3 objects created by the pipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-<timestamp>`. (default:None)
        """
        self.preprocessor = preprocessor
        self.estimator = estimator
        self.inputs = inputs
        self.s3_bucket = s3_bucket

        for key in self.__class__.__allowed_kwargs:
            setattr(self, key, kwargs.pop(key, None))

        if not self.pipeline_name:
            self.pipeline_name = 'inference-pipeline-{date}'.format(
                date=self._generate_timestamp())

        self.definition = self.build_workflow_definition()
        self.input_template = self._extract_input_template(self.definition)

        workflow = Workflow(name=self.pipeline_name,
                            definition=self.definition,
                            role=role,
                            format_json=True,
                            client=client)

        super(InferencePipeline, self).__init__(s3_bucket=s3_bucket,
                                                workflow=workflow,
                                                role=role,
                                                client=client)
Пример #16
0
def main():
    sagemaker_session = sagemaker.Session()
    stepfunctions.set_stream_logger(level=logging.INFO)

    bucket = 's3://pixiv-image-backet'

    sagemaker_execution_role = 'arn:aws:iam::829044821271:role/service-role/AmazonSageMaker-ExecutionRole-20200412T194702'
    workflow_execution_role = 'arn:aws:iam::829044821271:role/StepFunctionsWorkflowExecutionRole'

    estimator1 = PyTorch(entry_point='train.py',
                         source_dir='projection_discriminator',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    estimator2 = PyTorch(entry_point='train.py',
                         source_dir='wgan_gp',
                         role=sagemaker_execution_role,
                         framework_version='1.4.0',
                         train_instance_count=2,
                         train_instance_type='ml.m5.2xlarge',
                         hyperparameters={
                             'train_epoch': 1,
                         })

    training_step1 = steps.TrainingStep(state_id='Train Step1',
                                        estimator=estimator1,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    training_step2 = steps.TrainingStep(state_id='Train Step2',
                                        estimator=estimator2,
                                        data={
                                            'training': bucket,
                                        },
                                        job_name='PD-Train-{0}'.format(
                                            uuid.uuid4()))

    parallel_state = steps.Parallel(state_id='Parallel', )

    parallel_state.add_branch(training_step1)
    parallel_state.add_branch(training_step2)

    workflow_definition = steps.Chain([parallel_state])

    workflow = Workflow(
        name='MyTraining-{0}'.format(uuid.uuid4()),
        definition=workflow_definition,
        role=workflow_execution_role,
    )

    workflow.create()
    workflow.execute()
Пример #17
0
 def build_workflow(self):
     """create a step functions workflow from the chain_of_tasks."""
     self.chain_of_tasks = self._construct_toposorted_chain_of_tasks()
     logger.debug("creating a chain from all the different steps.")
     self.chain_of_tasks = self._integrate_notification_in_workflow(
         chain_of_tasks=self.chain_of_tasks)
     logger.debug(f"creating a workflow with name {self.unique_name}")
     sfn_client = boto3.client("stepfunctions")
     self.workflow = Workflow(
         name=self.unique_name,
         definition=self.chain_of_tasks,
         role=self.role.role_arn,
         client=sfn_client,
         **self.kwargs,
     )
Пример #18
0
def test_list_workflows(client):
    paginator = client.get_paginator('list_state_machines')
    paginator.paginate = MagicMock(return_value=[{
        'stateMachines': [{
            'stateMachineArn': state_machine_arn,
            'name': state_machine_name,
            'creationDate': datetime(2019, 1, 1)
        }],
        'NextToken':
        'Token'
    }])

    client.get_paginator = MagicMock(return_value=paginator)
    workflows = Workflow.list_workflows(max_items=999, client=client)

    paginator.paginate.assert_called_with(PaginationConfig={
        'MaxItems': 999,
        'PageSize': 1000
    })
Пример #19
0
def find_state_machine_arn(state_machine: str) -> str:
    """lookup the state machine arn based on the state machine name."""
    workflows = Workflow.list_workflows()
    state_machine_object = [
        workflow for workflow in workflows
        if workflow.get("name") == state_machine
    ]
    if len(state_machine_object) == 1:
        logger.debug(
            f"we have found one statemachine {state_machine_object[0]}")
        return state_machine_object[0].get("stateMachineArn")
    elif len(state_machine_object) == 0:
        logger.error(f"statemachine {state_machine} not found.")
        raise LookupError("no statemachine found.")
    else:
        logger.error(
            f"more than one statemachine found with name {state_machine}.")
        raise Exception(
            "more than one statemachine found. Something strange is going on ..."
        )
def test_pass_state_machine_creation(sfn_client, sfn_role_arn):
    pass_state_name = "Pass"
    pass_state_result = "Pass Result"
    asl_state_machine_definition = {
        "StartAt": pass_state_name,
        "States": {
            pass_state_name: {
                "Result": pass_state_result,
                "Type": "Pass",
                "End": True
            }
        }
    }

    definition = steps.Pass(pass_state_name, result=pass_state_result)
    workflow = Workflow(unique_name_from_base('Test_Pass_Workflow'),
                        definition=definition,
                        role=sfn_role_arn)

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition,
                        pass_state_result)
def test_task_state_machine_creation(sfn_client, sfn_role_arn, training_job_parameters):
    task_state_name = "TaskState"
    final_state_name = "FinalState"
    resource = "arn:aws:states:::sagemaker:createTrainingJob.sync"
    task_state_result = "Task State Result"
    asl_state_machine_definition = { 
        "StartAt": task_state_name,
        "States": { 
            task_state_name: { 
                "Resource": resource,
                "Parameters": training_job_parameters,
                "Type": "Task",
                "Next": final_state_name
            },
            final_state_name: {
                "Type": "Pass",
                "Result" : task_state_result,
                "End": True
            }
        }
    }

    definition = steps.Chain([
        steps.Task(
            task_state_name,
            resource=resource,
            parameters=training_job_parameters
        ),
        steps.Pass(final_state_name, result=task_state_result)
    ])
    
    workflow = Workflow(
        'Test_Task_Workflow',
        definition=definition,
        role=sfn_role_arn
    )

    workflow_test_suite(sfn_client, workflow, asl_state_machine_definition, task_state_result)
Пример #22
0
def create_sfn_workflow(params, steps):
    sfn_workflow_name = params['sfn-workflow-name']
    workflow_execution_role = params['sfn-role-arn']

    workflow_graph = Chain(steps)

    branching_workflow = Workflow(
        name=sfn_workflow_name,
        definition=workflow_graph,
        role=workflow_execution_role,
    )

    branching_workflow.create()
    branching_workflow.update(workflow_graph)

    time.sleep(5)

    return branching_workflow
Пример #23
0
training_step.add_branch(train_step_A)
training_step.add_branch(train_step_B)

# Chain the steps together to generate a full AWS Step Functions
workflow_definition = steps.Chain([
    training_step,
    endpoint_create_step,
    endpoint_wait_step,
    model_test_step
])

# Create a Amazon Step Function workflow based in inputs
workflow = Workflow(
    name=state_machine_name,
    state_machine_arn=state_machine_arn,
    definition=workflow_definition,
    role=workflow_role,
    execution_input=event_input
)

### END WORK FLOW DEFINITION ####

# Manually update some settings that are not generated correctly by the AWS Step Functions Data Science SDK.
jsonDef = workflow.definition.to_json(pretty=True)
jsonDef = jsonDef.replace("TrainingImage\": \"latesta", "TrainingImage.$\": \"$$.Execution.Input['ecrArnA']")
jsonDef = jsonDef.replace("TrainingImage\": \"latestb", "TrainingImage.$\": \"$$.Execution.Input['ecrArnB']")
jsonDef = jsonDef.replace("Image\": \"latesta", "Image.$\": \"$$.Execution.Input['ecrArnA']")
jsonDef = jsonDef.replace("Image\": \"latestb", "Image.$\": \"$$.Execution.Input['ecrArnB']")
jsonDef = jsonDef.replace("ModelDataUrl.$\": \"$['ModelArtifacts']['S3ModelArtifacts']", "ModelDataUrl.$\": \"$['train_step_result']['ModelArtifacts']['S3ModelArtifacts']")
jsonDef = jsonDef.replace("TrainingJobName", "TrainingJobName.$")
Пример #24
0
            }
    )

    ## SageMaker の学習ジョブを実行するステップ
    estimator = create_estimator()
    data_path = {'train': args.data_path}

    training_step = steps.TrainingStep(
        'Train Step', 
        estimator=estimator,
        data=data_path,
        job_name=execution_input['TrainJobName'],  
        wait_for_completion=False  # SFnを実行した後に Bitbucket へプルリクを上げるように変更したため、ここは True で良いかも。
    )

    # 各 Step を連結
    chain_list = [etl_step, training_step]
    workflow_definition = steps.Chain(chain_list)

    # Workflow の作成
    workflow = Workflow(
        name=FLOW_NAME,
        definition=workflow_definition,
        role=WORKFLOW_ROLE,
        execution_input=execution_input
    )
    workflow.create()

    # Workflow の実行
    execution = workflow.execute(inputs=inputs)
Пример #25
0
workflow_definition = steps.Chain([
    etl_step,
    training_step,
    model_step,
    lambda_step,
    check_accuracy_step
])

# This can be used to create a brand new workflow
try:
    # This is used to update the existing workflow. 
    # That way you can still see all the step function run history
    # You could alternatively delete and recreate the workflow
    state_machine_arn = 'arn:aws:states:ap-southeast-2:' + account_id + ':stateMachine:' + workflow_name
    workflow = Workflow.attach(state_machine_arn=state_machine_arn)
    workflow.update(
        definition = workflow_definition,
        role=workflow_execution_role
    )
except:
    workflow = Workflow(
        name=workflow_name,
        definition=workflow_definition,
        role=workflow_execution_role,
        execution_input=execution_input
    )
    workflow.create()


# Documentation states the following:
Пример #26
0
def define_inference_pipeline(
    sm_role,
    workflow_execution_role,
    inference_pipeline_name,
    return_yaml=True,
    dump_yaml_file="templates/sagemaker_inference_pipeline.yaml",
    kms_key_id=None,
):
    """
    Return YAML definition of the training pipeline, which consists of multiple
    Amazon StepFunction steps

    sm_role:                    ARN of the SageMaker execution role
    workflow_execution_role:    ARN of the StepFunction execution role
    return_yaml:                Return YAML representation or not, if False,
                     it returns an instance of `stepfunctions.workflow.WorkflowObject`
    dump_yaml_file:  If not None, a YAML file will be generated at this file location

    """

    # Pass required parameters dynamically for each execution using placeholders.
    execution_input = ExecutionInput(
        schema={
            "InputDataURL": str,
            "PreprocessingJobName": str,
            "InferenceJobName": str,
            "ProcModelS3": str,
            "PreprocessingCodeURL": str,
            "InferenceCodeURL": str,
            "ModelS3": str,
            "PreprocessedTrainDataURL": str,
            "PreprocessedTestDataURL": str,
            "OutputPathURL": str,
        })
    """
    Create Preprocessing Model from model artifact.
    """
    # sagemaker_session = sagemaker.Session()

    sklearn_processor = SKLearnProcessor(
        framework_version="0.20.0",
        role=sm_role,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        max_runtime_in_seconds=1200,
    )
    # Create ProcessingInputs and ProcessingOutputs objects for Inputs and
    # Outputs respectively for the SageMaker Processing Job
    inputs = [
        ProcessingInput(
            source=execution_input["InputDataURL"],
            destination="/opt/ml/processing/input",
            input_name="input-1",
        ),
        ProcessingInput(
            source=execution_input["PreprocessingCodeURL"],
            destination="/opt/ml/processing/input/code",
            input_name="code",
        ),
        ProcessingInput(
            source=execution_input["ProcModelS3"],
            destination="/opt/ml/processing/model",
            input_name="proc_model",
        ),
    ]

    outputs = [
        ProcessingOutput(
            source="/opt/ml/processing/test",
            destination=execution_input["PreprocessedTestDataURL"],
            output_name="test_data",
        ),
    ]

    processing_step = ProcessingStep(
        "SageMaker pre-processing step",
        processor=sklearn_processor,
        job_name=execution_input["PreprocessingJobName"],
        inputs=inputs,
        outputs=outputs,
        container_arguments=["--mode", "infer"],
        container_entrypoint=[
            "python3",
            "/opt/ml/processing/input/code/preprocessing.py",
        ],
        kms_key_id=kms_key_id,
    )
    """
    Create inference with sklearn processing step.

    Inputs are the preprocessed data S3 URL, the inference code S3 URL, and
    the model S3 URL. Output is the inferred data.
    """
    sklearn_processor2 = SKLearnProcessor(
        framework_version="0.20.0",
        role=sm_role,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        max_runtime_in_seconds=1200,
    )
    inputs = [
        ProcessingInput(
            source=execution_input["PreprocessedTestDataURL"],
            destination="/opt/ml/processing/input",
            input_name="input-1",
        ),
        ProcessingInput(
            source=execution_input["InferenceCodeURL"],
            destination="/opt/ml/processing/input/code",
            input_name="code",
        ),
        ProcessingInput(
            source=execution_input["ModelS3"],
            destination="/opt/ml/processing/model",
            input_name="model",
        ),
    ]

    outputs = [
        ProcessingOutput(
            source="/opt/ml/processing/test",
            destination=execution_input["OutputPathURL"],
            output_name="test_data",
        ),
    ]

    inference_step = ProcessingStep(
        "SageMaker inference step",
        processor=sklearn_processor2,
        job_name=execution_input["InferenceJobName"],
        inputs=inputs,
        outputs=outputs,
        container_entrypoint=[
            "python3",
            "/opt/ml/processing/input/code/inference.py",
        ],
        kms_key_id=kms_key_id,
    )

    # Create Fail state to mark the workflow failed in case any of the steps fail.
    failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail(
        "ML Workflow failed", cause="SageMakerProcessingJobFailed")

    # Add the Error handling in the workflow
    catch_state_processing = stepfunctions.steps.states.Catch(
        error_equals=["States.TaskFailed"],
        next_step=failed_state_sagemaker_processing_failure,
    )

    processing_step.add_catch(catch_state_processing)
    inference_step.add_catch(catch_state_processing)

    # Create the Workflow
    workflow_graph = Chain([processing_step, inference_step])
    inference_pipeline = Workflow(
        name=inference_pipeline_name,
        definition=workflow_graph,
        role=workflow_execution_role,
    )
    return inference_pipeline
Пример #27
0
def test_attach_existing_workflow(client):
    workflow = Workflow.attach(state_machine_arn, client)
    assert workflow.name == state_machine_name
    assert workflow.role == role_arn
    assert workflow.state_machine_arn == state_machine_arn
Пример #28
0
                                                model_name=job_name,
                                                initial_instance_count=1,
                                                instance_type='ml.m5.large')

endpoint_step = steps.EndpointStep(
    "Create or Update Endpoint",
    endpoint_name=execution_input['EndpointName'],
    endpoint_config_name=job_name,
    update=update_endpoint)

workflow_definition = steps.Chain(
    [training_step, model_step, endpoint_config_step, endpoint_step])

# Update the workflow that is already created

workflow = Workflow.attach(workflow_arn)
workflow.update(definition=workflow_definition)
print('Workflow updated: {}'.format(workflow_arn))

# Sleep for 5 seconds then execute after this is applied
time.sleep(5)

execution = workflow.execute(inputs=execution_params)
stepfunction_arn = execution.execution_arn
print('Workflow exectuted: {}'.format(stepfunction_arn))

# Export environment variables

if not os.path.exists('cloud_formation'):
    os.makedirs('cloud_formation')
Пример #29
0
def define_training_pipeline(
    sm_role,
    workflow_execution_role,
    training_pipeline_name,
    return_yaml=True,
    dump_yaml_file="templates/sagemaker_training_pipeline.yaml",
    kms_key_id=None,
):
    """
    Return YAML definition of the training pipeline, which consists of multiple
    Amazon StepFunction steps

    sm_role:                    ARN of the SageMaker execution role
    workflow_execution_role:    ARN of the StepFunction execution role
    return_yaml:                Return YAML representation or not, if False,
                                it returns an instance of
                                    `stepfunctions.workflow.WorkflowObject`
    dump_yaml_file:             If not None, a YAML file will be generated at
                                    this file location

    """

    # Pass required parameters dynamically for each execution using placeholders.
    execution_input = ExecutionInput(
        schema={
            "InputDataURL": str,
            "PreprocessingJobName": str,
            "PreprocessingCodeURL": str,
            "TrainingJobName": str,
            # Prevent sagemaker config hardcode sagemaker_submit_directory in
            # workflow definition
            "SMSubmitDirURL": str,
            # Prevent sagemaker config hardcode sagemaker_region in workflow definition
            "SMRegion": str,
            "EvaluationProcessingJobName": str,
            "EvaluationCodeURL": str,
            "EvaluationResultURL": str,
            "PreprocessedTrainDataURL": str,
            "PreprocessedTestDataURL": str,
            "PreprocessedModelURL": str,
            "SMOutputDataURL": str,
            "SMDebugOutputURL": str,
        })
    """
    Data pre-processing and feature engineering
    """
    sklearn_processor = SKLearnProcessor(
        framework_version="0.20.0",
        role=sm_role,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        max_runtime_in_seconds=1200,
    )

    # Create ProcessingInputs and ProcessingOutputs objects for Inputs and
    # Outputs respectively for the SageMaker Processing Job
    inputs = [
        ProcessingInput(
            source=execution_input["InputDataURL"],
            destination="/opt/ml/processing/input",
            input_name="input-1",
        ),
        ProcessingInput(
            source=execution_input["PreprocessingCodeURL"],
            destination="/opt/ml/processing/input/code",
            input_name="code",
        ),
    ]

    outputs = [
        ProcessingOutput(
            source="/opt/ml/processing/train",
            destination=execution_input["PreprocessedTrainDataURL"],
            output_name="train_data",
        ),
        ProcessingOutput(
            source="/opt/ml/processing/test",
            destination=execution_input["PreprocessedTestDataURL"],
            output_name="test_data",
        ),
        ProcessingOutput(
            source="/opt/ml/processing/model",
            destination=execution_input["PreprocessedModelURL"],
            output_name="proc_model",
        ),
    ]

    processing_step = ProcessingStep(
        "SageMaker pre-processing step",
        processor=sklearn_processor,
        job_name=execution_input["PreprocessingJobName"],
        inputs=inputs,
        outputs=outputs,
        container_arguments=[
            "--train-test-split-ratio", "0.2", "--mode", "train"
        ],
        container_entrypoint=[
            "python3",
            "/opt/ml/processing/input/code/preprocessing.py",
        ],
        kms_key_id=kms_key_id,
    )
    """
    Training using the pre-processed data
    """
    sklearn = SKLearn(
        entry_point="../../src/mlmax/train.py",
        train_instance_type="ml.m5.xlarge",
        role=sm_role,
        py_version="py3",
        framework_version="0.20.0",
        output_kms_key=kms_key_id,
    )

    training_step = MLMaxTrainingStep(
        "SageMaker Training Step",
        estimator=sklearn,
        job_name=execution_input["TrainingJobName"],
        train_data=execution_input["PreprocessedTrainDataURL"],
        test_data=execution_input["PreprocessedTestDataURL"],
        sm_submit_url=execution_input["SMSubmitDirURL"],
        sm_region=execution_input["SMRegion"],
        sm_output_data=execution_input["SMOutputDataURL"],
        sm_debug_output_data=execution_input["SMDebugOutputURL"],
        wait_for_completion=True,
    )
    """
    Model evaluation
    """
    # Create input and output objects for Model Evaluation ProcessingStep.
    inputs_evaluation = [
        ProcessingInput(
            source=execution_input["PreprocessedTestDataURL"],
            destination="/opt/ml/processing/test",
            input_name="input-1",
        ),
        ProcessingInput(
            source=training_step.get_expected_model().model_data,
            destination="/opt/ml/processing/model",
            input_name="input-2",
        ),
        ProcessingInput(
            source=execution_input["EvaluationCodeURL"],
            destination="/opt/ml/processing/input/code",
            input_name="code",
        ),
    ]

    outputs_evaluation = [
        ProcessingOutput(
            source="/opt/ml/processing/evaluation",
            destination=execution_input["EvaluationResultURL"],
            output_name="evaluation",
        ),
    ]

    model_evaluation_processor = SKLearnProcessor(
        framework_version="0.20.0",
        role=sm_role,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        max_runtime_in_seconds=1200,
    )

    processing_evaluation_step = ProcessingStep(
        "SageMaker Processing Model Evaluation step",
        processor=model_evaluation_processor,
        job_name=execution_input["EvaluationProcessingJobName"],
        inputs=inputs_evaluation,
        outputs=outputs_evaluation,
        container_entrypoint=[
            "python3", "/opt/ml/processing/input/code/evaluation.py"
        ],
    )

    # Create Fail state to mark the workflow failed in case any of the steps fail.
    failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail(
        "ML Workflow failed", cause="SageMakerProcessingJobFailed")

    # Add the Error handling in the workflow
    catch_state_processing = stepfunctions.steps.states.Catch(
        error_equals=["States.TaskFailed"],
        next_step=failed_state_sagemaker_processing_failure,
    )
    processing_step.add_catch(catch_state_processing)
    processing_evaluation_step.add_catch(catch_state_processing)
    training_step.add_catch(catch_state_processing)

    # Create the Workflow
    workflow_graph = Chain(
        [processing_step, training_step, processing_evaluation_step])
    training_pipeline = Workflow(
        name=training_pipeline_name,
        definition=workflow_graph,
        role=workflow_execution_role,
    )
    return training_pipeline
def main(
    git_branch,
    codebuild_id,
    pipeline_name,
    model_name,
    deploy_role,
    sagemaker_role,
    sagemaker_bucket,
    data_dir,
    output_dir,
    ecr_dir,
    kms_key_id,
    workflow_role_arn,
    notification_arn,
    sagemaker_project_id,
    tags,
):
    # Define the function names
    create_experiment_function_name = "mlops-create-experiment"
    query_training_function_name = "mlops-query-training"

    # Get the region
    region = boto3.Session().region_name
    print("region: {}".format(region))

    if ecr_dir:
        # Load the image uri and input data config
        with open(os.path.join(ecr_dir, "imageDetail.json"), "r") as f:
            image_uri = json.load(f)["ImageURI"]
    else:
        # Get the the managed image uri for current region
        image_uri = get_training_image(region)
    print("image uri: {}".format(image_uri))

    with open(os.path.join(data_dir, "inputData.json"), "r") as f:
        input_data = json.load(f)
        print("training uri: {}".format(input_data["TrainingUri"]))
        print("validation uri: {}".format(input_data["ValidationUri"]))
        print("baseline uri: {}".format(input_data["BaselineUri"]))

    # Get the job id and source revisions
    job_id = get_pipeline_execution_id(pipeline_name, codebuild_id)
    revisions = get_pipeline_revisions(pipeline_name, job_id)
    git_commit_id = revisions["ModelSourceOutput"]
    data_verison_id = revisions["DataSourceOutput"]
    print("job id: {}".format(job_id))
    print("git commit: {}".format(git_commit_id))
    print("data version: {}".format(data_verison_id))

    # Set the output Data
    output_data = {
        "ModelOutputUri":
        "s3://{}/{}".format(sagemaker_bucket, model_name),
        "BaselineOutputUri":
        f"s3://{sagemaker_bucket}/{model_name}/monitoring/baseline/{model_name}-pbl-{job_id}",
    }
    print("model output uri: {}".format(output_data["ModelOutputUri"]))

    # Pass these into the training method
    hyperparameters = {}
    if os.path.exists(os.path.join(data_dir, "hyperparameters.json")):
        with open(os.path.join(data_dir, "hyperparameters.json"), "r") as f:
            hyperparameters = json.load(f)
            for i in hyperparameters:
                hyperparameters[i] = str(hyperparameters[i])

    # Define the step functions execution input schema
    execution_input = ExecutionInput(
        schema={
            "GitBranch": str,
            "GitCommitHash": str,
            "DataVersionId": str,
            "ExperimentName": str,
            "TrialName": str,
            "BaselineJobName": str,
            "BaselineOutputUri": str,
            "TrainingJobName": str,
        })

    # Create experiment step
    experiment_step = create_experiment_step(create_experiment_function_name)
    baseline_step = create_baseline_step(input_data, execution_input, region,
                                         sagemaker_role)
    training_step = create_training_step(
        image_uri,
        hyperparameters,
        input_data,
        output_data,
        execution_input,
        query_training_function_name,
        region,
        sagemaker_role,
    )
    workflow_definition = create_graph(experiment_step, baseline_step,
                                       training_step)

    # Create the workflow as the model name
    workflow = Workflow(model_name, workflow_definition, workflow_role_arn)
    print("Creating workflow: {0}-{1}".format(model_name,
                                              sagemaker_project_id))

    # Create output directory
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # Write the workflow graph to json
    with open(os.path.join(output_dir, "workflow-graph.json"), "w") as f:
        f.write(workflow.definition.to_json(pretty=True))

    # Write the workflow graph to yml
    with open(os.path.join(output_dir, "workflow-graph.yml"), "w") as f:
        f.write(workflow.get_cloudformation_template())

    # Write the workflow inputs to file
    with open(os.path.join(output_dir, "workflow-input.json"), "w") as f:
        workflow_inputs = {
            "ExperimentName": "{}".format(model_name),
            "TrialName": "{}-{}".format(model_name, job_id),
            "GitBranch": git_branch,
            "GitCommitHash": git_commit_id,
            "DataVersionId": data_verison_id,
            "BaselineJobName": "{}-pbl-{}".format(model_name, job_id),
            "BaselineOutputUri": output_data["BaselineOutputUri"],
            "TrainingJobName": "{}-{}".format(model_name, job_id),
        }
        json.dump(workflow_inputs, f)

    # Write the dev & prod params for CFN
    with open(os.path.join(output_dir, "deploy-model-dev.json"), "w") as f:
        config = get_dev_config(model_name, job_id, deploy_role, image_uri,
                                kms_key_id, sagemaker_project_id)
        json.dump(config, f)
    with open(os.path.join(output_dir, "deploy-model-prd.json"), "w") as f:
        config = get_prd_config(
            model_name,
            job_id,
            deploy_role,
            image_uri,
            kms_key_id,
            notification_arn,
            sagemaker_project_id,
        )
        json.dump(config, f)