def test_workflow_input_placeholder(): workflow_input = ExecutionInput() test_step = Pass(state_id="StateOne", parameters={ "ParamA": "SampleValueA", "ParamB": workflow_input, "ParamC": workflow_input["Key01"], "ParamD": workflow_input["Key02"]["Key03"], "ParamE": workflow_input["Key01"]["Key03"], }) expected_repr = { "Type": "Pass", "Parameters": { "ParamA": "SampleValueA", "ParamB.$": "$$.Execution.Input", "ParamC.$": "$$.Execution.Input['Key01']", "ParamD.$": "$$.Execution.Input['Key02']['Key03']", "ParamE.$": "$$.Execution.Input['Key01']['Key03']" }, "End": True } assert test_step.to_dict() == expected_repr
def test_step_input_order_validation(): workflow_input = ExecutionInput() test_step_01 = Pass(state_id='StateOne', parameters={ 'ParamA': workflow_input['Key02']['Key03'], 'ParamD': workflow_input['Key01']['Key03'], }) test_step_02 = Pass(state_id='StateTwo', parameters={ 'ParamC': workflow_input["Key05"], "ParamB": "SampleValueB", "ParamE": test_step_01.output()["Response"]["Key04"] }) test_step_03 = Pass(state_id='StateThree', parameters={ 'ParamG': "SampleValueG", "ParamF": workflow_input["Key06"], "ParamH": "SampleValueH" }) workflow_definition = Chain([test_step_01, test_step_03, test_step_02]) with pytest.raises(ValueError): result = Graph(workflow_definition).to_dict()
def workflow(client): execution_input = ExecutionInput() test_step_01 = Pass(state_id='StateOne', parameters={ 'ParamA': execution_input['Key02']['Key03'], 'ParamD': execution_input['Key01']['Key03'], }) test_step_02 = Pass(state_id='StateTwo', parameters={ 'ParamC': execution_input["Key05"], "ParamB": "SampleValueB", "ParamE": test_step_01.output()["Response"]["Key04"] }) test_step_03 = Pass(state_id='StateThree', parameters={ 'ParamG': "SampleValueG", "ParamF": execution_input["Key06"], "ParamH": "SampleValueH", "ParamI": test_step_02.output() }) workflow_definition = Chain([test_step_01, test_step_02, test_step_03]) workflow = Workflow(name='TestWorkflow', definition=workflow_definition, role='testRoleArn', execution_input=execution_input, client=client) return workflow
def test_workflow_input_placeholder(): workflow_input = ExecutionInput() test_step = Pass(state_id='StateOne', parameters={ 'ParamA': 'SampleValueA', 'ParamB': workflow_input, 'ParamC': workflow_input['Key01'], 'ParamD': workflow_input['Key02']['Key03'], 'ParamE': workflow_input['Key01']['Key03'], }) expected_repr = { "Type": "Pass", "Parameters": { "ParamA": "SampleValueA", "ParamB.$": "$$.Execution.Input", "ParamC.$": "$$.Execution.Input['Key01']", "ParamD.$": "$$.Execution.Input['Key02']['Key03']", "ParamE.$": "$$.Execution.Input['Key01']['Key03']" }, "End": True } assert test_step.to_dict() == expected_repr
def add_execution_input(self, unique_name: str) -> None: logger.debug(f"adding execution input for {unique_name}") if unique_name in self.execution_input_schema: raise DataJobSagemakerException( f"The entry {unique_name} already exists in the execution input." ) self.execution_input_schema[unique_name] = str self.execution_input = ExecutionInput( schema=self.execution_input_schema)
def test_model_step_with_placeholders(trained_estimator, sfn_client, sagemaker_session, sfn_role_arn): # Build workflow definition execution_input = ExecutionInput(schema={ 'ModelName': str, 'Mode': str, 'Tags': list }) parameters = { 'PrimaryContainer': { 'Mode': execution_input['Mode'] }, 'Tags': execution_input['Tags'] } model_step = ModelStep('create_model_step', model=trained_estimator.create_model(), model_name=execution_input['ModelName'], parameters=parameters) model_step.add_retry(SAGEMAKER_RETRY_STRATEGY) workflow_graph = Chain([model_step]) with timeout(minutes=DEFAULT_TIMEOUT_MINUTES): # Create workflow and check definition workflow = create_workflow_and_check_definition( workflow_graph=workflow_graph, workflow_name=unique_name_from_base( "integ-test-model-step-workflow"), sfn_client=sfn_client, sfn_role_arn=sfn_role_arn) inputs = { 'ModelName': generate_job_name(), 'Mode': 'SingleModel', 'Tags': [{ 'Key': 'Environment', 'Value': 'test' }] } # Execute workflow execution = workflow.execute(inputs=inputs) execution_output = execution.get_output(wait=True) # Check workflow output assert execution_output.get("ModelArn") is not None assert execution_output["SdkHttpMetadata"]["HttpStatusCode"] == 200 # Cleanup state_machine_delete_wait(sfn_client, workflow.state_machine_arn) model_name = get_resource_name_from_arn( execution_output.get("ModelArn")).split("/")[1] delete_sagemaker_model(model_name, sagemaker_session)
def test_placeholder_make_immutable(): workflow_input = ExecutionInput() workflow_input["A"]["b"].get("C", float) workflow_input["Message"] workflow_input["Key01"]["Key02"] workflow_input["Key03"] workflow_input["Key03"]["Key04"] assert check_immutable(workflow_input) == False workflow_input._make_immutable() assert check_immutable(workflow_input) == True
def test_map_state_with_placeholders(): workflow_input = ExecutionInput() step_result = StepResult() map_state = Map(state_id="MapState01", result_selector={ "foo": step_result["foo"], "bar": step_result["bar1"]["bar2"] }) iterator_state = Pass("TrainIterator", parameters={ "ParamA": map_state.output()["X"]["Y"], "ParamB": workflow_input["Key01"]["Key02"]["Key03"] }) map_state.attach_iterator(iterator_state) workflow_definition = Chain([map_state]) expected_repr = { "StartAt": "MapState01", "States": { "MapState01": { "Type": "Map", "ResultSelector": { "foo.$": "$['foo']", "bar.$": "$['bar1']['bar2']" }, "End": True, "Iterator": { "StartAt": "TrainIterator", "States": { "TrainIterator": { "Parameters": { "ParamA.$": "$['X']['Y']", "ParamB.$": "$$.Execution.Input['Key01']['Key02']['Key03']" }, "Type": "Pass", "End": True } } } } } } result = Graph(workflow_definition).to_dict() assert result == expected_repr
def test_map_state_with_placeholders(): workflow_input = ExecutionInput() map_state = Map('MapState01') iterator_state = Pass('TrainIterator', parameters={ 'ParamA': map_state.output()['X']["Y"], 'ParamB': workflow_input["Key01"]["Key02"]["Key03"] }) map_state.attach_iterator(iterator_state) workflow_definition = Chain([map_state]) expected_repr = { "StartAt": "MapState01", "States": { "MapState01": { "Type": "Map", "End": True, "Iterator": { "StartAt": "TrainIterator", "States": { "TrainIterator": { "Parameters": { "ParamA.$": "$['X']['Y']", "ParamB.$": "$$.Execution.Input['Key01']['Key02']['Key03']" }, "Type": "Pass", "End": True } } } } } } result = Graph(workflow_definition).to_dict() assert result == expected_repr
def test_placeholder_with_schema(): test_schema = { "A": { "B":{ "C": int } }, "Request": { "Status": str }, "Hello": float } workflow_input = ExecutionInput(schema=test_schema) assert workflow_input.get_schema_as_dict() == test_schema assert workflow_input.immutable == True with pytest.raises(ValueError): workflow_input["A"]["B"]["D"] with pytest.raises(ValueError): workflow_input["A"]["B"].get("C", float)
def test_placeholder_schema_as_dict(): workflow_input = ExecutionInput() workflow_input["A"]["b"].get("C", float) workflow_input["Message"] workflow_input["Key01"]["Key02"] workflow_input["Key03"] workflow_input["Key03"]["Key04"] expected_schema = { "A": { "b": { "C": float } }, "Message": str, "Key01": { "Key02": str }, "Key03": { "Key04": str } } assert workflow_input.get_schema_as_dict() == expected_schema
output_path='s3://{}/{}/output'.format(bucket, project_name)) xgb.set_hyperparameters(max_depth=5, eta=0.2, gamma=4, min_child_weight=6, subsample=0.8, silent=0, objective='binary:logistic', eval_metric='error', num_round=100) # Build out the workflow execution_input = ExecutionInput(schema={ 'TrainingJobName': str, 'ModelName': str }) etl_step = steps.GlueStartJobRunStep( 'Extract, Transform, Load', parameters={"JobName": job_name, "Arguments":{ '--S3_SOURCE': data_source, '--S3_DEST': 's3a://{}/{}/'.format(bucket, project_name), '--TRAIN_KEY': train_prefix + '/', '--VAL_KEY': val_prefix +'/'} } ) training_step = steps.TrainingStep(
parser.add_argument('--batch_job_definition', type=str, default=os.environ['BATCH_JOB_DEFINITION']) parser.add_argument('--batch_job_name', type=str, default=os.environ['BATCH_JOB_NAME']) parser.add_argument('--batch_job_queue', type=str, default=os.environ['BATCH_JOB_QUEUE']) parser.add_argument('--train_url', type=str, default=os.environ['TRAIN_URL']) parser.add_argument('--data_path', type=str, default=os.environ['DATA_PATH']) parser.add_argument('--batch_size', type=str, default=os.environ['BATCH_SIZE']) parser.add_argument('--epoch', type=str, default=os.environ['EPOCH']) args = parser.parse_args() # SFn の実行に必要な情報を渡す際のスキーマを定義します execution_input = ExecutionInput(schema={ # AWS Batch 'BatchJobDefinition': str, 'BatchJobName': str, 'BatchJobQueue': str, # SageMaker 'TrainJobName': str, } ) # SFn のワークフローの定義を記載します inputs={ # AWS Batch 'BatchJobDefinition': args.batch_job_definition, 'BatchJobName': args.batch_job_name, 'BatchJobQueue': args.batch_job_queue, # SageMaker Training 'TrainJobName': TRAINING_JOB_NAME }
from stepfunctions.inputs import ExecutionInput from stepfunctions.workflow import Workflow stepfunctions.set_stream_logger(level=logging.INFO) id = uuid.uuid4().hex FLOW_NAME = 'active_learning_flow_{}'.format(id) WORKFLOW_ROLE = 'ROLE ARN' if __name__ == '__main__': # SFn の実行に必要な情報を渡す際のスキーマを定義します execution_input = ExecutionInput( schema={ # AWS Batch 'BatchJobDefinition': str, 'BatchJobName': str, 'BatchJobQueue': str, # AWS Lambda 'LambdaFunctionName': str, }) # SFn のワークフローの定義を記載します inputs = { # AWS Batch 'BatchJobDefinition': 'active-learning-job_run:1', 'BatchJobName': 'active-learning-inference', 'BatchJobQueue': 'active-learning-inference', # AWS Lambda 'LambdaFunctionName': 'create_labeling_job' }
def setup_workflow(project, purpose, workflow_execution_role, script_dir, ecr_repository): """ to setup all needed for a step function with sagemaker. arg: project: project name under sagemaker purpose: subproject workflow_execution_role: arn to execute step functions script_dir: processing file name, like a .py file ecr_repository: ecr repository name return: workflow: a stepfunctions.workflow.Workflow instance example: PROJECT = '[dpt-proj-2022]' PURPOSE = '[processing]' WORKFLOW_EXECUTION_ROLE = "arn:aws-cn:iam::[*********]:role/[**************]" SCRIPT_DIR = "[processing].py" ECR_REPOSITORY = '[ecr-2022]' """ # SageMaker Session setup # ======================================================================================== # SageMaker Session # ==================================== account_id = boto3.client('sts').get_caller_identity().get('Account') role = sagemaker.get_execution_role() # Storage # ==================================== session = sagemaker.Session() region = session.boto_region_name s3_output = session.default_bucket() # Code storage # ================== s3_prefix = '{}/{}'.format(project, purpose) s3_prefix_code = '{}/code'.format(s3_prefix) s3CodePath = 's3://{}/{}/code'.format(s3_output, s3_prefix) ## preprocess & prediction script_list = [script_dir] for script in script_list: session.upload_data(script, bucket=session.default_bucket(), key_prefix=s3_prefix_code) # ECR environment # ==================================== uri_suffix = 'amazonaws.com.cn' tag = ':latest' ecr_repository_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag) # SageMaker Experiments setup # ======================================================================================== experiment = Experiment.create( experiment_name="{}-{}".format(project, int(time.time())), description="machine learning project", sagemaker_boto_client=boto3.client('sagemaker')) print(experiment) execution_input = ExecutionInput(schema={ "ProcessingJobName": str, "ResultPath": str, }) # setup script processor script_processor = ScriptProcessor(command=['python3'], image_uri=ecr_repository_uri, role=role, instance_count=1, instance_type='ml.m5.4xlarge') # Step # ======================================================================================== optimizing_step = steps.ProcessingStep( "Processing Step", processor=script_processor, job_name=execution_input["ProcessingJobName"], inputs=[ ProcessingInput(source=s3CodePath, destination='/opt/ml/processing/input/code', input_name='code') ], outputs=[ ProcessingOutput(output_name=purpose, destination=execution_input["ResultPath"], source='/opt/ml/processing/{}'.format(purpose)) ], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/" + script_dir ], ) # Fail Sate # ======================================================================================== failed_state = steps.states.Fail("Processing Workflow failed", cause="SageMakerProcessingJobFailed") catch_state_processing = steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state) # Create Workflow # ======================================================================================== optimizing_step.add_catch(catch_state_processing) workflow_name = workflow_name = "workflow-{}-{}".format(project, purpose).upper() workflow_graph = steps.Chain([optimizing_step]) workflow = Workflow(name=workflow_name, definition=workflow_graph, role=workflow_execution_role) workflow.create() return workflow
from stepfunctions.template.utils import replace_parameters_with_jsonpath stepfunctions.set_stream_logger(level=logging.INFO) region = boto3.Session().region_name model_namea = f"DEMO-decission-tree-pred-{datetime.now():%Y-%m-%d-%H-%M-%S}" model_nameb = f"DEMO-random-forest-pred-{datetime.now():%Y-%m-%d-%H-%M-%S}" # Create a schema for input event_input = ExecutionInput(schema={ 'BuildId': str, 'ModelA': str, 'ModelB': str, 'Endpoint': str, 'ecrArnA': str, 'ecrArnB': str, 'dataBucketPath': str, 'authorDate': str, 'triggerSource': str, 'commitId': str, }) # Define static variables determined by appsec sagemaker_role = 'arn:aws:iam::860660749434:role/qls-28583-e80f1ff13e6e273a-SageMakerRole-ND1XCTEJG4JM' workflow_role = 'arn:aws:iam::860660749434:role/qls-28583-e80f1ff13e6e273a-StepFunctionsRole-1873OQ5BK2E8U' ecr_ArnA = 'latesta' ecr_ArnB = 'latestb' state_machine_arn = 'arn:aws:states:us-west-2:860660749434:stateMachine:trainingStateMachine-qxyULJR6C733' state_machine_name = 'trainingStateMachine-qxyULJR6C733' dynamoDBTable = 'qls-28583-e80f1ff13e6e273a-DynamoDBTable-13I0WGPSJZZVI' endpoint_wait_lambda = 'arn:aws:lambda:us-west-2:860660749434:function:qls-28583-e80f1ff13e6e273a-endpointWaitLambda-dpyAW80Wkrh3'
def define_training_pipeline( sm_role, workflow_execution_role, training_pipeline_name, return_yaml=True, dump_yaml_file="templates/sagemaker_training_pipeline.yaml", kms_key_id=None, ): """ Return YAML definition of the training pipeline, which consists of multiple Amazon StepFunction steps sm_role: ARN of the SageMaker execution role workflow_execution_role: ARN of the StepFunction execution role return_yaml: Return YAML representation or not, if False, it returns an instance of `stepfunctions.workflow.WorkflowObject` dump_yaml_file: If not None, a YAML file will be generated at this file location """ # Pass required parameters dynamically for each execution using placeholders. execution_input = ExecutionInput( schema={ "InputDataURL": str, "PreprocessingJobName": str, "PreprocessingCodeURL": str, "TrainingJobName": str, # Prevent sagemaker config hardcode sagemaker_submit_directory in # workflow definition "SMSubmitDirURL": str, # Prevent sagemaker config hardcode sagemaker_region in workflow definition "SMRegion": str, "EvaluationProcessingJobName": str, "EvaluationCodeURL": str, "EvaluationResultURL": str, "PreprocessedTrainDataURL": str, "PreprocessedTestDataURL": str, "PreprocessedModelURL": str, "SMOutputDataURL": str, "SMDebugOutputURL": str, }) """ Data pre-processing and feature engineering """ sklearn_processor = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) # Create ProcessingInputs and ProcessingOutputs objects for Inputs and # Outputs respectively for the SageMaker Processing Job inputs = [ ProcessingInput( source=execution_input["InputDataURL"], destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/train", destination=execution_input["PreprocessedTrainDataURL"], output_name="train_data", ), ProcessingOutput( source="/opt/ml/processing/test", destination=execution_input["PreprocessedTestDataURL"], output_name="test_data", ), ProcessingOutput( source="/opt/ml/processing/model", destination=execution_input["PreprocessedModelURL"], output_name="proc_model", ), ] processing_step = ProcessingStep( "SageMaker pre-processing step", processor=sklearn_processor, job_name=execution_input["PreprocessingJobName"], inputs=inputs, outputs=outputs, container_arguments=[ "--train-test-split-ratio", "0.2", "--mode", "train" ], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/preprocessing.py", ], kms_key_id=kms_key_id, ) """ Training using the pre-processed data """ sklearn = SKLearn( entry_point="../../src/mlmax/train.py", train_instance_type="ml.m5.xlarge", role=sm_role, py_version="py3", framework_version="0.20.0", output_kms_key=kms_key_id, ) training_step = MLMaxTrainingStep( "SageMaker Training Step", estimator=sklearn, job_name=execution_input["TrainingJobName"], train_data=execution_input["PreprocessedTrainDataURL"], test_data=execution_input["PreprocessedTestDataURL"], sm_submit_url=execution_input["SMSubmitDirURL"], sm_region=execution_input["SMRegion"], sm_output_data=execution_input["SMOutputDataURL"], sm_debug_output_data=execution_input["SMDebugOutputURL"], wait_for_completion=True, ) """ Model evaluation """ # Create input and output objects for Model Evaluation ProcessingStep. inputs_evaluation = [ ProcessingInput( source=execution_input["PreprocessedTestDataURL"], destination="/opt/ml/processing/test", input_name="input-1", ), ProcessingInput( source=training_step.get_expected_model().model_data, destination="/opt/ml/processing/model", input_name="input-2", ), ProcessingInput( source=execution_input["EvaluationCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs_evaluation = [ ProcessingOutput( source="/opt/ml/processing/evaluation", destination=execution_input["EvaluationResultURL"], output_name="evaluation", ), ] model_evaluation_processor = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) processing_evaluation_step = ProcessingStep( "SageMaker Processing Model Evaluation step", processor=model_evaluation_processor, job_name=execution_input["EvaluationProcessingJobName"], inputs=inputs_evaluation, outputs=outputs_evaluation, container_entrypoint=[ "python3", "/opt/ml/processing/input/code/evaluation.py" ], ) # Create Fail state to mark the workflow failed in case any of the steps fail. failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail( "ML Workflow failed", cause="SageMakerProcessingJobFailed") # Add the Error handling in the workflow catch_state_processing = stepfunctions.steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state_sagemaker_processing_failure, ) processing_step.add_catch(catch_state_processing) processing_evaluation_step.add_catch(catch_state_processing) training_step.add_catch(catch_state_processing) # Create the Workflow workflow_graph = Chain( [processing_step, training_step, processing_evaluation_step]) training_pipeline = Workflow( name=training_pipeline_name, definition=workflow_graph, role=workflow_execution_role, ) return training_pipeline
def test_placeholder_path(): workflow_input = ExecutionInput() placeholder_variable = workflow_input["A"]["b"]["C"] expected_path = ["A", "b", "C"] assert placeholder_variable._get_path() == expected_path
from stepfunctions.inputs import ExecutionInput from stepfunctions.workflow import Workflow from time import gmtime, strftime from sagemaker.model_monitor import DataCaptureConfig import utils sagemaker_session = sagemaker.Session() sagemaker_exec_role = utils.get_sagemaker_execution_role() sfn_client = client('stepfunctions') # define execution input execution_input = ExecutionInput( schema={ 'AutoMLJobName': str, 'ModelName': str, 'S3InputData': str, 'IamRole': str, 'TargetColumnName': str, 'S3OutputData': str, 'Tags': dict, 'EndpointName': str, 'EndpointConfigName': str }) # TODO: make this a notification workflow_failure = Fail('WorkflowFailed') # create autopilot lambda step create_autopilot_job_step = LambdaStep( 'StartAutopilotJob', parameters={ 'FunctionName': 'CreateAutopilotJob', 'Payload': {
def define_data_pipeline( sm_role, workflow_execution_role, data_pipeline_name, return_yaml=True, dump_yaml_file="templates/sagemaker_data_pipeline.yaml", ): """ Return YAML definition of the training pipeline, which consists of multiple Amazon StepFunction steps sm_role: ARN of the SageMaker execution role workflow_execution_role: ARN of the StepFunction execution role return_yaml: Return YAML representation or not, if False, it returns an instance of `stepfunctions.workflow.WorkflowObject` dump_yaml_file: If not None, a YAML file will be generated at this file location """ # Pass required parameters dynamically for each execution using placeholders. execution_input = ExecutionInput( schema={ "PreprocessingJobName": str, "PreprocessingCodeURL": str, "PreprocessedOutputDataURL": str, "S3InputPath": str, "S3OutputPath": str, } ) """ Data pre-processing and feature engineering """ # processor = PySparkProcessor( region = "ap-southeast-1" image = "sagemaker-spark-processing" img_uri = f"759080221371.dkr.ecr.{region}.amazonaws.com/{image}:2.4-cpu" processor = ScriptProcessor( image_uri=img_uri, role=sm_role, instance_count=16, instance_type="ml.m5.2xlarge", command=["/opt/program/submit"], max_runtime_in_seconds=3600, env={"mode": "python"}, ) # Create ProcessingInputs and ProcessingOutputs objects for Inputs and # Outputs respectively for the SageMaker Processing Job inputs = [ ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/output", destination=execution_input["PreprocessedOutputDataURL"], output_name="processed_data", ), ] processing_step = MLMAXProcessingStep( "SageMaker pre-processing step", processor=processor, job_name=execution_input["PreprocessingJobName"], inputs=inputs, outputs=outputs, environment={ "S3InputPath": execution_input["S3InputPath"], "S3OutputPath": execution_input["S3OutputPath"], }, container_entrypoint=[ "smspark-submit", "/opt/ml/processing/input/code/preprocessing.py", ], ) # Create Fail state to mark the workflow failed in case any of the steps fail. failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail( "ML Workflow failed", cause="SageMakerProcessingJobFailed" ) # Add the Error handling in the workflow catch_state_processing = stepfunctions.steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state_sagemaker_processing_failure, ) processing_step.add_catch(catch_state_processing) # Create the Workflow workflow_graph = Chain([processing_step]) data_pipeline = Workflow( name=data_pipeline_name, definition=workflow_graph, role=workflow_execution_role, ) return data_pipeline
def test_parallel_state_with_placeholders(): workflow_input = ExecutionInput() step_result = StepResult() parallel_state = Parallel(state_id="ParallelState01", result_selector={ "foo": step_result["foo"], "bar": step_result["bar1"]["bar2"] }) branch_A = Pass("Branch_A", parameters={ "ParamA": parallel_state.output()["A"]["B"], "ParamB": workflow_input["Key01"] }) branch_B = Pass("Branch_B", parameters={ "ParamA": "TestValue", "ParamB": parallel_state.output()["Response"]["Key"]["State"] }) branch_C = Pass("Branch_C", parameters={ "ParamA": parallel_state.output()["A"]["B"].get("C", float), "ParamB": "HelloWorld" }) parallel_state.add_branch(branch_A) parallel_state.add_branch(branch_B) parallel_state.add_branch(branch_C) workflow_definition = Chain([parallel_state]) result = Graph(workflow_definition).to_dict() expected_repr = { "StartAt": "ParallelState01", "States": { "ParallelState01": { "Type": "Parallel", "ResultSelector": { "foo.$": "$['foo']", "bar.$": "$['bar1']['bar2']" }, "End": True, "Branches": [{ "StartAt": "Branch_A", "States": { "Branch_A": { "Parameters": { "ParamA.$": "$['A']['B']", "ParamB.$": "$$.Execution.Input['Key01']" }, "Type": "Pass", "End": True } } }, { "StartAt": "Branch_B", "States": { "Branch_B": { "Parameters": { "ParamA": "TestValue", "ParamB.$": "$['Response']['Key']['State']" }, "Type": "Pass", "End": True } } }, { "StartAt": "Branch_C", "States": { "Branch_C": { "Parameters": { "ParamA.$": "$['A']['B']['C']", "ParamB": "HelloWorld" }, "Type": "Pass", "End": True } } }] } } } assert result == expected_repr
def test_training_step_creation_with_placeholders(pca_estimator): execution_input = ExecutionInput(schema={ 'Data': str, 'OutputPath': str, }) step_input = StepInput(schema={ 'JobName': str, }) step = TrainingStep( 'Training', estimator=pca_estimator, job_name=step_input['JobName'], data=execution_input['Data'], output_data_config_path=execution_input['OutputPath'], experiment_config={ 'ExperimentName': 'pca_experiment', 'TrialName': 'pca_trial', 'TrialComponentDisplayName': 'Training' }, tags=DEFAULT_TAGS, ) assert step.to_dict() == { 'Type': 'Task', 'Parameters': { 'AlgorithmSpecification': { 'TrainingImage': PCA_IMAGE, 'TrainingInputMode': 'File' }, 'OutputDataConfig': { 'S3OutputPath.$': "$$.Execution.Input['OutputPath']" }, 'StoppingCondition': { 'MaxRuntimeInSeconds': 86400 }, 'ResourceConfig': { 'InstanceCount': 1, 'InstanceType': 'ml.c4.xlarge', 'VolumeSizeInGB': 30 }, 'RoleArn': EXECUTION_ROLE, 'HyperParameters': { 'feature_dim': '50000', 'num_components': '10', 'subtract_mean': 'True', 'algorithm_mode': 'randomized', 'mini_batch_size': '200' }, 'InputDataConfig': [{ 'ChannelName': 'training', 'DataSource': { 'S3DataSource': { 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3Uri.$': "$$.Execution.Input['Data']" } } }], 'ExperimentConfig': { 'ExperimentName': 'pca_experiment', 'TrialName': 'pca_trial', 'TrialComponentDisplayName': 'Training' }, 'TrainingJobName.$': "$['JobName']", 'Tags': DEFAULT_TAGS_LIST }, 'Resource': 'arn:aws:states:::sagemaker:createTrainingJob.sync', 'End': True }
def define_inference_pipeline( sm_role, workflow_execution_role, inference_pipeline_name, return_yaml=True, dump_yaml_file="templates/sagemaker_inference_pipeline.yaml", kms_key_id=None, ): """ Return YAML definition of the training pipeline, which consists of multiple Amazon StepFunction steps sm_role: ARN of the SageMaker execution role workflow_execution_role: ARN of the StepFunction execution role return_yaml: Return YAML representation or not, if False, it returns an instance of `stepfunctions.workflow.WorkflowObject` dump_yaml_file: If not None, a YAML file will be generated at this file location """ # Pass required parameters dynamically for each execution using placeholders. execution_input = ExecutionInput( schema={ "InputDataURL": str, "PreprocessingJobName": str, "InferenceJobName": str, "ProcModelS3": str, "PreprocessingCodeURL": str, "InferenceCodeURL": str, "ModelS3": str, "PreprocessedTrainDataURL": str, "PreprocessedTestDataURL": str, "OutputPathURL": str, }) """ Create Preprocessing Model from model artifact. """ # sagemaker_session = sagemaker.Session() sklearn_processor = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) # Create ProcessingInputs and ProcessingOutputs objects for Inputs and # Outputs respectively for the SageMaker Processing Job inputs = [ ProcessingInput( source=execution_input["InputDataURL"], destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ProcessingInput( source=execution_input["ProcModelS3"], destination="/opt/ml/processing/model", input_name="proc_model", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/test", destination=execution_input["PreprocessedTestDataURL"], output_name="test_data", ), ] processing_step = ProcessingStep( "SageMaker pre-processing step", processor=sklearn_processor, job_name=execution_input["PreprocessingJobName"], inputs=inputs, outputs=outputs, container_arguments=["--mode", "infer"], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/preprocessing.py", ], kms_key_id=kms_key_id, ) """ Create inference with sklearn processing step. Inputs are the preprocessed data S3 URL, the inference code S3 URL, and the model S3 URL. Output is the inferred data. """ sklearn_processor2 = SKLearnProcessor( framework_version="0.20.0", role=sm_role, instance_type="ml.m5.xlarge", instance_count=1, max_runtime_in_seconds=1200, ) inputs = [ ProcessingInput( source=execution_input["PreprocessedTestDataURL"], destination="/opt/ml/processing/input", input_name="input-1", ), ProcessingInput( source=execution_input["InferenceCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ProcessingInput( source=execution_input["ModelS3"], destination="/opt/ml/processing/model", input_name="model", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/test", destination=execution_input["OutputPathURL"], output_name="test_data", ), ] inference_step = ProcessingStep( "SageMaker inference step", processor=sklearn_processor2, job_name=execution_input["InferenceJobName"], inputs=inputs, outputs=outputs, container_entrypoint=[ "python3", "/opt/ml/processing/input/code/inference.py", ], kms_key_id=kms_key_id, ) # Create Fail state to mark the workflow failed in case any of the steps fail. failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail( "ML Workflow failed", cause="SageMakerProcessingJobFailed") # Add the Error handling in the workflow catch_state_processing = stepfunctions.steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state_sagemaker_processing_failure, ) processing_step.add_catch(catch_state_processing) inference_step.add_catch(catch_state_processing) # Create the Workflow workflow_graph = Chain([processing_step, inference_step]) inference_pipeline = Workflow( name=inference_pipeline_name, definition=workflow_graph, role=workflow_execution_role, ) return inference_pipeline
def test_workflow_with_placeholders(): workflow_input = ExecutionInput() test_step_01 = Pass(state_id='StateOne', parameters={ 'ParamA': workflow_input['Key02']['Key03'], 'ParamD': workflow_input['Key01']['Key03'], }) test_step_02 = Pass(state_id='StateTwo', parameters={ 'ParamC': workflow_input["Key05"], "ParamB": "SampleValueB", "ParamE": test_step_01.output()["Response"]["Key04"] }) test_step_03 = Pass(state_id='StateThree', parameters={ 'ParamG': "SampleValueG", "ParamF": workflow_input["Key06"], "ParamH": "SampleValueH" }) workflow_definition = Chain([test_step_01, test_step_02, test_step_03]) result = Graph(workflow_definition).to_dict() expected_workflow_repr = { "StartAt": "StateOne", "States": { "StateOne": { "Type": "Pass", "Parameters": { "ParamA.$": "$$.Execution.Input['Key02']['Key03']", "ParamD.$": "$$.Execution.Input['Key01']['Key03']" }, "Next": "StateTwo" }, "StateTwo": { "Type": "Pass", "Parameters": { "ParamC.$": "$$.Execution.Input['Key05']", "ParamB": "SampleValueB", "ParamE.$": "$['Response']['Key04']" }, "Next": "StateThree" }, "StateThree": { "Type": "Pass", "Parameters": { "ParamG": "SampleValueG", "ParamF.$": "$$.Execution.Input['Key06']", "ParamH": "SampleValueH" }, "End": True } } } assert result == expected_workflow_repr
def main( git_branch, codebuild_id, pipeline_name, model_name, deploy_role, sagemaker_role, sagemaker_bucket, data_dir, output_dir, ecr_dir, kms_key_id, workflow_role_arn, notification_arn, sagemaker_project_id, tags, ): # Define the function names create_experiment_function_name = "mlops-create-experiment" query_training_function_name = "mlops-query-training" # Get the region region = boto3.Session().region_name print("region: {}".format(region)) if ecr_dir: # Load the image uri and input data config with open(os.path.join(ecr_dir, "imageDetail.json"), "r") as f: image_uri = json.load(f)["ImageURI"] else: # Get the the managed image uri for current region image_uri = get_training_image(region) print("image uri: {}".format(image_uri)) with open(os.path.join(data_dir, "inputData.json"), "r") as f: input_data = json.load(f) print("training uri: {}".format(input_data["TrainingUri"])) print("validation uri: {}".format(input_data["ValidationUri"])) print("baseline uri: {}".format(input_data["BaselineUri"])) # Get the job id and source revisions job_id = get_pipeline_execution_id(pipeline_name, codebuild_id) revisions = get_pipeline_revisions(pipeline_name, job_id) git_commit_id = revisions["ModelSourceOutput"] data_verison_id = revisions["DataSourceOutput"] print("job id: {}".format(job_id)) print("git commit: {}".format(git_commit_id)) print("data version: {}".format(data_verison_id)) # Set the output Data output_data = { "ModelOutputUri": "s3://{}/{}".format(sagemaker_bucket, model_name), "BaselineOutputUri": f"s3://{sagemaker_bucket}/{model_name}/monitoring/baseline/{model_name}-pbl-{job_id}", } print("model output uri: {}".format(output_data["ModelOutputUri"])) # Pass these into the training method hyperparameters = {} if os.path.exists(os.path.join(data_dir, "hyperparameters.json")): with open(os.path.join(data_dir, "hyperparameters.json"), "r") as f: hyperparameters = json.load(f) for i in hyperparameters: hyperparameters[i] = str(hyperparameters[i]) # Define the step functions execution input schema execution_input = ExecutionInput( schema={ "GitBranch": str, "GitCommitHash": str, "DataVersionId": str, "ExperimentName": str, "TrialName": str, "BaselineJobName": str, "BaselineOutputUri": str, "TrainingJobName": str, }) # Create experiment step experiment_step = create_experiment_step(create_experiment_function_name) baseline_step = create_baseline_step(input_data, execution_input, region, sagemaker_role) training_step = create_training_step( image_uri, hyperparameters, input_data, output_data, execution_input, query_training_function_name, region, sagemaker_role, ) workflow_definition = create_graph(experiment_step, baseline_step, training_step) # Create the workflow as the model name workflow = Workflow(model_name, workflow_definition, workflow_role_arn) print("Creating workflow: {0}-{1}".format(model_name, sagemaker_project_id)) # Create output directory if not os.path.exists(output_dir): os.mkdir(output_dir) # Write the workflow graph to json with open(os.path.join(output_dir, "workflow-graph.json"), "w") as f: f.write(workflow.definition.to_json(pretty=True)) # Write the workflow graph to yml with open(os.path.join(output_dir, "workflow-graph.yml"), "w") as f: f.write(workflow.get_cloudformation_template()) # Write the workflow inputs to file with open(os.path.join(output_dir, "workflow-input.json"), "w") as f: workflow_inputs = { "ExperimentName": "{}".format(model_name), "TrialName": "{}-{}".format(model_name, job_id), "GitBranch": git_branch, "GitCommitHash": git_commit_id, "DataVersionId": data_verison_id, "BaselineJobName": "{}-pbl-{}".format(model_name, job_id), "BaselineOutputUri": output_data["BaselineOutputUri"], "TrainingJobName": "{}-{}".format(model_name, job_id), } json.dump(workflow_inputs, f) # Write the dev & prod params for CFN with open(os.path.join(output_dir, "deploy-model-dev.json"), "w") as f: config = get_dev_config(model_name, job_id, deploy_role, image_uri, kms_key_id, sagemaker_project_id) json.dump(config, f) with open(os.path.join(output_dir, "deploy-model-prd.json"), "w") as f: config = get_prd_config( model_name, job_id, deploy_role, image_uri, kms_key_id, notification_arn, sagemaker_project_id, ) json.dump(config, f)
def define_monitor_pipeline( account, region, sm_role, workflow_execution_role, data_pipeline_name, return_yaml=True, dump_yaml_file="templates/sagemaker_data_pipeline.yaml", ): """ Return YAML definition of the training pipeline, which consists of multiple Amazon StepFunction steps sm_role: ARN of the SageMaker execution role workflow_execution_role: ARN of the StepFunction execution role return_yaml: Return YAML representation or not, if False, it returns an instance of `stepfunctions.workflow.WorkflowObject` dump_yaml_file: If not None, a YAML file will be generated at this file location """ # Pass required parameters dynamically for each execution using placeholders. execution_input = ExecutionInput( schema={ "PreprocessingJobName": str, "PreprocessingInferJobName": str, "PreprocessingCodeURL": str, "MonitorTrainOutputURL": str, "MonitorInferOutputURL": str, "InputDataURL": str, "InferDataURL": str, }) """ Custom container for monitoring """ image = "mlmax-processing-monitor" img_uri = f"{account}.dkr.ecr.{region}.amazonaws.com/{image}:latest" processor = ScriptProcessor( image_uri=img_uri, role=sm_role, instance_count=16, instance_type="ml.m5.2xlarge", command=["/opt/program/submit"], max_runtime_in_seconds=3600, env={"mode": "python"}, ) ############################# # Baseline ############################# # Create ProcessingInputs and ProcessingOutputs objects for Inputs and # Outputs respectively for the SageMaker Processing Job inputs = [ ProcessingInput( source=execution_input["InputDataURL"], destination="/opt/ml/processing/train_input", input_name="train-input-data", ), ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/profiling/inference", destination=execution_input["MonitorTrainOutputURL"], output_name="baseline-data", ) ] processing_step = ProcessingStep( "SageMaker pre-processing Baseline", processor=processor, job_name=execution_input["PreprocessingJobName"], inputs=inputs, outputs=outputs, container_arguments=[ "--train-test-split-ratio", "0.2", "--mode", "train" ], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/monitoring.py", ], ) ############################# # Inference ############################# inputs = [ ProcessingInput( source=execution_input["InferDataURL"], destination="/opt/ml/processing/infer_input", input_name="infer-input-data", ), ProcessingInput( source=execution_input["MonitorTrainOutputURL"], destination="/opt/ml/processing/profiling", input_name="baseline-data", ), ProcessingInput( source=execution_input["PreprocessingCodeURL"], destination="/opt/ml/processing/input/code", input_name="code", ), ] outputs = [ ProcessingOutput( source="/opt/ml/processing/profiling/inference", destination=execution_input["MonitorInferOutputURL"], output_name="monitor-output", ) ] processing_step_inference = ProcessingStep( "SageMaker pre-processing Inference", processor=processor, job_name=execution_input["PreprocessingInferJobName"], inputs=inputs, outputs=outputs, container_arguments=["--mode", "infer"], container_entrypoint=[ "python3", "/opt/ml/processing/input/code/monitoring.py", ], ) # Create Fail state to mark the workflow failed in case any of the steps fail. failed_state_sagemaker_processing_failure = stepfunctions.steps.states.Fail( "ML Workflow failed", cause="SageMakerProcessingJobFailed") # Add the Error handling in the workflow catch_state_processing = stepfunctions.steps.states.Catch( error_equals=["States.TaskFailed"], next_step=failed_state_sagemaker_processing_failure, ) processing_step.add_catch(catch_state_processing) processing_step_inference.add_catch(catch_state_processing) # Create the Workflow workflow_graph = Chain([processing_step, processing_step_inference]) data_pipeline = Workflow( name=data_pipeline_name, definition=workflow_graph, role=workflow_execution_role, ) return data_pipeline
job_name_prefix = params['job-name-prefix'] # job_name = job_name_prefix + '-' + timestamp sagemaker_role = params['sagemaker-role-arn'] # prepro_job_name = 'prepro-' + job_name # train_job_name = 'train-' + job_name # eval_job_name = 'eval-' + job_name prepro_job_name = params['prep-job-name'] train_job_name = params['train-job-name'] eval_job_name = params['eval-job-name'] execution_input = ExecutionInput( schema={ "PreprocessingJobName": str, "TrainingJobName": str, "EvaluationJobName": str, } ) pre_processor = create_prepro_processing(params, prepro_job_name, sagemaker_role) processing_step = create_prepro_step(params, pre_processor, execution_input) estimator = create_estimator(params, sagemaker_role) training_step = create_training_step(params, estimator, execution_input) model_evaluation_processor = create_evaluation_processor(params, sagemaker_role) evaluation_step = create_evaluation_step(
from stepfunctions.workflow import Workflow from stepfunctions.template import TrainingPipeline from stepfunctions.template.utils import replace_parameters_with_jsonpath stepfunctions.set_stream_logger(level=logging.INFO) region = boto3.Session().region_name # Create a schema for input event_input = ExecutionInput( schema={ 'BuildId': str, 'Job': str, 'Model': str, 'Endpoint': str, 'ecrArn': str, 'dataBucketPath': str, 'authorDate': str, 'DynamoDBTable': str, 'triggerSource': str, 'commitId': str, }) # Define static variables determined by appsec sagemaker_role = 'arn:aws:iam::029186701721:role/qls-28580-acffd3aac73526af-SageMakerRole-R671IS83H4LJ' workflow_role = 'arn:aws:iam::029186701721:role/qls-28580-acffd3aac73526af-StepFunctionsRole-13AGQ50ASU7XJ' ecr_Arn = 'latest' state_machine_arn = 'arn:aws:states:us-west-2:029186701721:stateMachine:trainingStateMachine-Z9vntGZ6ypil' state_machine_name = 'trainingStateMachine-Z9vntGZ6ypil' dynamoDBTable = 'qls-28580-acffd3aac73526af-DynamoDBTable-460366LPOX1P' endpoint_wait_lambda = 'arn:aws:lambda:us-west-2:029186701721:function:qls-28580-acffd3aac73526af-endpointWaitLambda-W5QtjVqVyuJB'
framework_version="0.90-2", py_version="py3", role=sagemaker_execution_role, debugger_hook_config=debug_hook_config, rules=debug_rules) # Upload model code to s3 xgb.prepare_workflow_for_training(job_name) print('uploaded code to: {}'.format(xgb.uploaded_code.s3_prefix)) # Create Workflow steps execution_input = ExecutionInput(schema={ 'TrainLocation': str, 'ValidationLocation': str, 'EndpointName': str }) execution_params = { 'TrainLocation': input_train_path, 'ValidationLocation': input_validation_path, 'EndpointName': endpoint_name } training_step = steps.TrainingStep( 'Train Step', estimator=xgb, data={ 'train': sagemaker.s3_input(execution_input['TrainLocation'], content_type='libsvm'),
def test_parallel_state_with_placeholders(): workflow_input = ExecutionInput() parallel_state = Parallel('ParallelState01') branch_A = Pass('Branch_A', parameters={ 'ParamA': parallel_state.output()['A']["B"], 'ParamB': workflow_input["Key01"] }) branch_B = Pass('Branch_B', parameters={ 'ParamA': "TestValue", 'ParamB': parallel_state.output()["Response"]["Key"]["State"] }) branch_C = Pass('Branch_C', parameters={ 'ParamA': parallel_state.output()['A']["B"].get("C", float), 'ParamB': "HelloWorld" }) parallel_state.add_branch(branch_A) parallel_state.add_branch(branch_B) parallel_state.add_branch(branch_C) workflow_definition = Chain([parallel_state]) result = Graph(workflow_definition).to_dict() expected_repr = { "StartAt": "ParallelState01", "States": { "ParallelState01": { "Type": "Parallel", "End": True, "Branches": [{ "StartAt": "Branch_A", "States": { "Branch_A": { "Parameters": { "ParamA.$": "$['A']['B']", "ParamB.$": "$$.Execution.Input['Key01']" }, "Type": "Pass", "End": True } } }, { "StartAt": "Branch_B", "States": { "Branch_B": { "Parameters": { "ParamA": "TestValue", "ParamB.$": "$['Response']['Key']['State']" }, "Type": "Pass", "End": True } } }, { "StartAt": "Branch_C", "States": { "Branch_C": { "Parameters": { "ParamA.$": "$['A']['B']['C']", "ParamB": "HelloWorld" }, "Type": "Pass", "End": True } } }] } } } assert result == expected_repr