Exemplo n.º 1
0
def test_pipeline_override_experiment_config():
    pipeline = Pipeline(
        name="MyPipeline",
        pipeline_experiment_config=PipelineExperimentConfig(
            "MyExperiment", "MyTrial"),
        steps=[CustomStep(name="MyStep", input_data="input")],
        sagemaker_session=sagemaker_session_mock,
    )
    assert ordered(json.loads(pipeline.definition())) == ordered({
        "Version":
        "2020-12-01",
        "Metadata": {},
        "Parameters": [],
        "PipelineExperimentConfig": {
            "ExperimentName": "MyExperiment",
            "TrialName": "MyTrial"
        },
        "Steps": [{
            "Name": "MyStep",
            "Type": "Training",
            "Arguments": {
                "input_data": "input"
            },
        }],
    })
Exemplo n.º 2
0
def test_processing_step_with_framework_processor(
    framework_processor, pipeline_session, processing_input, network_config
):

    processor, run_inputs = framework_processor
    processor.sagemaker_session = pipeline_session
    processor.role = sagemaker.get_execution_role()

    processor.volume_kms_key = "volume-kms-key"
    processor.network_config = network_config

    run_inputs["inputs"] = processing_input

    step_args = processor.run(**run_inputs)

    step = ProcessingStep(
        name="MyProcessingStep",
        step_args=step_args,
    )
    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )

    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyProcessingStep",
        "Type": "Processing",
        "Arguments": step_args,
    }
Exemplo n.º 3
0
def test_pipeline_execution_basics(sagemaker_session_mock):
    sagemaker_session_mock.sagemaker_client.start_pipeline_execution.return_value = {
        "PipelineExecutionArn": "my:arn"
    }
    sagemaker_session_mock.sagemaker_client.list_pipeline_execution_steps.return_value = {
        "PipelineExecutionSteps": [Mock()]
    }
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[ParameterString("alpha", "beta"), ParameterString("gamma", "delta")],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    execution = pipeline.start()
    execution.stop()
    assert sagemaker_session_mock.sagemaker_client.stop_pipeline_execution.called_with(
        PipelineExecutionArn="my:arn"
    )
    execution.describe()
    assert sagemaker_session_mock.sagemaker_client.describe_pipeline_execution.called_with(
        PipelineExecutionArn="my:arn"
    )
    steps = execution.list_steps()
    assert sagemaker_session_mock.sagemaker_client.describe_pipeline_execution_steps.called_with(
        PipelineExecutionArn="my:arn"
    )
    assert len(steps) == 1
Exemplo n.º 4
0
def test_fail_step_with_join_fn_in_error_message():
    param = ParameterInteger(name="MyInt", default_value=2)
    cond = ConditionEquals(left=param, right=1)
    step_cond = ConditionStep(
        name="CondStep",
        conditions=[cond],
        if_steps=[],
        else_steps=[],
    )
    step_fail = FailStep(
        name="FailStep",
        error_message=Join(on=": ",
                           values=[
                               "Failed due to xxx == yyy returns",
                               step_cond.properties.Outcome
                           ]),
    )
    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step_cond, step_fail],
        parameters=[param],
    )

    _expected_dsl = [
        {
            "Name": "CondStep",
            "Type": "Condition",
            "Arguments": {
                "Conditions": [{
                    "Type": "Equals",
                    "LeftValue": {
                        "Get": "Parameters.MyInt"
                    },
                    "RightValue": 1
                }],
                "IfSteps": [],
                "ElseSteps": [],
            },
        },
        {
            "Name": "FailStep",
            "Type": "Fail",
            "Arguments": {
                "ErrorMessage": {
                    "Std:Join": {
                        "On":
                        ": ",
                        "Values": [
                            "Failed due to xxx == yyy returns",
                            {
                                "Get": "Steps.CondStep.Outcome"
                            },
                        ],
                    }
                }
            },
        },
    ]

    assert json.loads(pipeline.definition())["Steps"] == _expected_dsl
Exemplo n.º 5
0
def test_training_step_with_framework_estimator(estimator, pipeline_session,
                                                training_input,
                                                hyperparameters):
    estimator.source_dir = DUMMY_S3_SOURCE_DIR
    estimator.set_hyperparameters(**hyperparameters)
    estimator.volume_kms_key = "volume-kms-key"
    estimator.output_kms_key = "output-kms-key"
    estimator.dependencies = ["dep-1", "dep-2"]

    estimator.sagemaker_session = pipeline_session
    step_args = estimator.fit(inputs=training_input)

    step = TrainingStep(
        name="MyTrainingStep",
        step_args=step_args,
    )
    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )

    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTrainingStep",
        "Type": "Training",
        "Arguments": step_args,
    }
def test_pipeline_interpolates_lambda_outputs(sagemaker_session):
    parameter = ParameterString("MyStr")
    output_param1 = LambdaOutput(output_name="output1", output_type=LambdaOutputTypeEnum.String)
    output_param2 = LambdaOutput(output_name="output2", output_type=LambdaOutputTypeEnum.String)
    lambda_step1 = LambdaStep(
        name="MyLambdaStep1",
        depends_on=["TestStep"],
        lambda_func=Lambda(
            function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
            session=sagemaker_session,
        ),
        inputs={"arg1": "foo"},
        outputs=[output_param1],
    )
    lambda_step2 = LambdaStep(
        name="MyLambdaStep2",
        depends_on=["TestStep"],
        lambda_func=Lambda(
            function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
            session=sagemaker_session,
        ),
        inputs={"arg1": output_param1},
        outputs=[output_param2],
    )

    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[parameter],
        steps=[lambda_step1, lambda_step2],
        sagemaker_session=sagemaker_session,
    )

    assert json.loads(pipeline.definition()) == {
        "Version": "2020-12-01",
        "Metadata": {},
        "Parameters": [{"Name": "MyStr", "Type": "String"}],
        "PipelineExperimentConfig": {
            "ExperimentName": {"Get": "Execution.PipelineName"},
            "TrialName": {"Get": "Execution.PipelineExecutionId"},
        },
        "Steps": [
            {
                "Name": "MyLambdaStep1",
                "Type": "Lambda",
                "Arguments": {"arg1": "foo"},
                "DependsOn": ["TestStep"],
                "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
                "OutputParameters": [{"OutputName": "output1", "OutputType": "String"}],
            },
            {
                "Name": "MyLambdaStep2",
                "Type": "Lambda",
                "Arguments": {"arg1": {"Get": "Steps.MyLambdaStep1.OutputParameters['output1']"}},
                "DependsOn": ["TestStep"],
                "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
                "OutputParameters": [{"OutputName": "output2", "OutputType": "String"}],
            },
        ],
    }
def test_tuning_step_with_single_algo_tuner(pipeline_session, entry_point):
    inputs = TrainingInput(
        s3_data=f"s3://{pipeline_session.default_bucket()}/training-data")

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=sagemaker.get_execution_role(),
        framework_version="1.5.0",
        py_version="py3",
        instance_count=1,
        instance_type="ml.m5.xlarge",
        sagemaker_session=pipeline_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
    )

    hyperparameter_ranges = {
        "batch-size": IntegerParameter(64, 128),
    }

    tuner = HyperparameterTuner(
        estimator=pytorch_estimator,
        objective_metric_name="test:acc",
        objective_type="Maximize",
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=[{
            "Name": "test:acc",
            "Regex": "Overall test accuracy: (.*?);"
        }],
        max_jobs=2,
        max_parallel_jobs=2,
    )

    with warnings.catch_warnings(record=True) as w:
        step_args = tuner.fit(inputs=inputs)
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Running within a PipelineSession" in str(w[-1].message)

    with warnings.catch_warnings(record=True) as w:
        step = TuningStep(
            name="MyTuningStep",
            step_args=step_args,
        )
        assert len(w) == 0

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )

    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTuningStep",
        "Type": "Tuning",
        "Arguments": step_args,
    }
Exemplo n.º 8
0
def test_processing_step_with_processor(pipeline_session, processing_input):
    processor = Processor(
        image_uri=IMAGE_URI,
        role=sagemaker.get_execution_role(),
        instance_count=1,
        instance_type=INSTANCE_TYPE,
        sagemaker_session=pipeline_session,
    )

    with warnings.catch_warnings(record=True) as w:
        step_args = processor.run(inputs=processing_input)
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Running within a PipelineSession" in str(w[-1].message)

    cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
    evaluation_report = PropertyFile(
        name="EvaluationReport", output_name="evaluation", path="evaluation.json"
    )

    with warnings.catch_warnings(record=True) as w:
        step = ProcessingStep(
            name="MyProcessingStep",
            step_args=step_args,
            description="ProcessingStep description",
            display_name="MyProcessingStep",
            depends_on=["TestStep", "SecondTestStep"],
            cache_config=cache_config,
            property_files=[evaluation_report],
        )
        assert len(w) == 0

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )
    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyProcessingStep",
        "Description": "ProcessingStep description",
        "DisplayName": "MyProcessingStep",
        "Type": "Processing",
        "DependsOn": ["TestStep", "SecondTestStep"],
        "Arguments": step_args,
        "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
        "PropertyFiles": [
            {
                "FilePath": "evaluation.json",
                "OutputName": "evaluation",
                "PropertyFileName": "EvaluationReport",
            }
        ],
    }
    assert step.properties.ProcessingJobName.expr == {
        "Get": "Steps.MyProcessingStep.ProcessingJobName"
    }
Exemplo n.º 9
0
def test_pipeline_describe(sagemaker_session_mock):
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    pipeline.describe()
    assert sagemaker_session_mock.sagemaker_client.describe_pipeline.called_with(
        PipelineName="MyPipeline", )
Exemplo n.º 10
0
def test_pipeline_start_before_creation(sagemaker_session_mock):
    sagemaker_session_mock.sagemaker_client.describe_pipeline.side_effect = ClientError({}, "bar")
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[ParameterString("alpha", "beta"), ParameterString("gamma", "delta")],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    with pytest.raises(ValueError):
        pipeline.start()
Exemplo n.º 11
0
def test_pipeline_basic():
    parameter = ParameterString("MyStr")
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[parameter],
        steps=[CustomStep(name="MyStep", input_data=parameter)],
        sagemaker_session=sagemaker_session_mock,
    )
    assert pipeline.to_request() == {
        "Version":
        "2020-12-01",
        "Metadata": {},
        "Parameters": [{
            "Name": "MyStr",
            "Type": "String"
        }],
        "PipelineExperimentConfig": {
            "ExperimentName": ExecutionVariables.PIPELINE_NAME,
            "TrialName": ExecutionVariables.PIPELINE_EXECUTION_ID,
        },
        "Steps": [{
            "Name": "MyStep",
            "Type": "Training",
            "Arguments": {
                "input_data": parameter
            }
        }],
    }
    assert ordered(json.loads(pipeline.definition())) == ordered({
        "Version":
        "2020-12-01",
        "Metadata": {},
        "Parameters": [{
            "Name": "MyStr",
            "Type": "String"
        }],
        "PipelineExperimentConfig": {
            "ExperimentName": {
                "Get": "Execution.PipelineName"
            },
            "TrialName": {
                "Get": "Execution.PipelineExecutionId"
            },
        },
        "Steps": [{
            "Name": "MyStep",
            "Type": "Training",
            "Arguments": {
                "input_data": {
                    "Get": "Parameters.MyStr"
                }
            },
        }],
    })
Exemplo n.º 12
0
def test_pipeline_create(sagemaker_session_mock, role_arn):
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    pipeline.create(role_arn=role_arn)
    assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with(
        PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn
    )
Exemplo n.º 13
0
def test_pipeline_interpolates_callback_outputs():
    parameter = ParameterString("MyStr")
    outputParam1 = CallbackOutput(output_name="output1", output_type=CallbackOutputTypeEnum.String)
    outputParam2 = CallbackOutput(output_name="output2", output_type=CallbackOutputTypeEnum.String)
    cb_step1 = CallbackStep(
        name="MyCallbackStep1",
        depends_on=["TestStep"],
        sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
        inputs={"arg1": "foo"},
        outputs=[outputParam1],
    )
    cb_step2 = CallbackStep(
        name="MyCallbackStep2",
        depends_on=["TestStep"],
        sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
        inputs={"arg1": outputParam1},
        outputs=[outputParam2],
    )

    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[parameter],
        steps=[cb_step1, cb_step2],
        sagemaker_session=sagemaker_session_mock,
    )

    assert json.loads(pipeline.definition()) == {
        "Version": "2020-12-01",
        "Metadata": {},
        "Parameters": [{"Name": "MyStr", "Type": "String"}],
        "PipelineExperimentConfig": {
            "ExperimentName": {"Get": "Execution.PipelineName"},
            "TrialName": {"Get": "Execution.PipelineExecutionId"},
        },
        "Steps": [
            {
                "Name": "MyCallbackStep1",
                "Type": "Callback",
                "Arguments": {"arg1": "foo"},
                "DependsOn": ["TestStep"],
                "SqsQueueUrl": "https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
                "OutputParameters": [{"OutputName": "output1", "OutputType": "String"}],
            },
            {
                "Name": "MyCallbackStep2",
                "Type": "Callback",
                "Arguments": {"arg1": {"Get": "Steps.MyCallbackStep1.OutputParameters['output1']"}},
                "DependsOn": ["TestStep"],
                "SqsQueueUrl": "https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
                "OutputParameters": [{"OutputName": "output2", "OutputType": "String"}],
            },
        ],
    }
Exemplo n.º 14
0
def test_pipeline_start(sagemaker_session_mock):
    sagemaker_session_mock.sagemaker_client.start_pipeline_execution.return_value = {
        "PipelineExecutionArn": "my:arn"
    }
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[
            ParameterString("alpha", "beta"),
            ParameterString("gamma", "delta")
        ],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    pipeline.start()
    assert sagemaker_session_mock.start_pipeline_execution.called_with(
        PipelineName="MyPipeline", )

    pipeline.start(execution_display_name="pipeline-execution")
    assert sagemaker_session_mock.start_pipeline_execution.called_with(
        PipelineName="MyPipeline",
        PipelineExecutionDisplayName="pipeline-execution")

    pipeline.start(parameters=dict(alpha="epsilon"))
    assert sagemaker_session_mock.start_pipeline_execution.called_with(
        PipelineName="MyPipeline",
        PipelineParameters=[{
            "Name": "alpha",
            "Value": "epsilon"
        }])
def test_transform_step_with_transformer(pipeline_session):
    model_name = ParameterString("ModelName")
    transformer = Transformer(
        model_name=model_name,
        instance_type="ml.m5.xlarge",
        instance_count=1,
        output_path=f"s3://{pipeline_session.default_bucket()}/Transform",
        sagemaker_session=pipeline_session,
    )

    transform_inputs = TransformInput(
        data=f"s3://{pipeline_session.default_bucket()}/batch-data",
    )

    with warnings.catch_warnings(record=True) as w:
        step_args = transformer.transform(
            data=transform_inputs.data,
            data_type=transform_inputs.data_type,
            content_type=transform_inputs.content_type,
            compression_type=transform_inputs.compression_type,
            split_type=transform_inputs.split_type,
            input_filter=transform_inputs.input_filter,
            output_filter=transform_inputs.output_filter,
            join_source=transform_inputs.join_source,
            model_client_config=transform_inputs.model_client_config,
        )
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Running within a PipelineSession" in str(w[-1].message)

    with warnings.catch_warnings(record=True) as w:
        step = TransformStep(
            name="MyTransformStep",
            step_args=step_args,
        )
        assert len(w) == 0

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        parameters=[model_name],
        sagemaker_session=pipeline_session,
    )
    step_args["ModelName"] = model_name.expr
    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTransformStep",
        "Type": "Transform",
        "Arguments": step_args,
    }
def test_two_step_lambda_pipeline_with_output_reference(
        sagemaker_session, role, pipeline_name, region_name):
    instance_count = ParameterInteger(name="InstanceCount", default_value=2)

    outputParam1 = LambdaOutput(output_name="output1",
                                output_type=LambdaOutputTypeEnum.String)
    step_lambda1 = LambdaStep(
        name="lambda-step1",
        lambda_func=Lambda(
            function_arn=
            ("arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda"
             ),
            session=sagemaker_session,
        ),
        inputs={"arg1": "foo"},
        outputs=[outputParam1],
    )

    step_lambda2 = LambdaStep(
        name="lambda-step2",
        lambda_func=Lambda(
            function_arn=
            ("arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda"
             ),
            session=sagemaker_session,
        ),
        inputs={"arg1": outputParam1},
        outputs=[],
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count],
        steps=[step_lambda1, step_lambda2],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 17
0
 def verfiy(step_args):
     step = ProcessingStep(
         name="MyProcessingStep",
         step_args=step_args,
     )
     pipeline = Pipeline(
         name="MyPipeline",
         steps=[step],
         sagemaker_session=pipeline_session,
     )
     assert json.loads(pipeline.definition())["Steps"][0] == {
         "Name": "MyProcessingStep",
         "Type": "Processing",
         "Arguments": step_args,
     }
Exemplo n.º 18
0
def test_pipeline_create_with_parallelism_config(sagemaker_session_mock, role_arn):
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[],
        steps=[],
        pipeline_experiment_config=ParallelismConfiguration(max_parallel_execution_steps=10),
        sagemaker_session=sagemaker_session_mock,
    )
    pipeline.create(role_arn=role_arn)
    assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with(
        PipelineName="MyPipeline",
        PipelineDefinition=pipeline.definition(),
        RoleArn=role_arn,
        ParallelismConfiguration={"MaxParallelExecutionSteps": 10},
    )
Exemplo n.º 19
0
def test_two_steps_emr_pipeline(sagemaker_session, role, pipeline_name,
                                region_name):
    instance_count = ParameterInteger(name="InstanceCount", default_value=2)

    emr_step_config = EMRStepConfig(
        jar=
        "s3://us-west-2.elasticmapreduce/libs/script-runner/script-runner.jar",
        args=["dummy_emr_script_path"],
    )

    step_emr_1 = EMRStep(
        name="emr-step-1",
        cluster_id="j-1YONHTCP3YZKC",
        display_name="emr_step_1",
        description="MyEMRStepDescription",
        step_config=emr_step_config,
    )

    step_emr_2 = EMRStep(
        name="emr-step-2",
        cluster_id=step_emr_1.properties.ClusterId,
        display_name="emr_step_2",
        description="MyEMRStepDescription",
        step_config=emr_step_config,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count],
        steps=[step_emr_1, step_emr_2],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 20
0
def test_create_and_update_with_parallelism_config(
    sagemaker_session, role, pipeline_name, region_name
):
    instance_count = ParameterInteger(name="InstanceCount", default_value=2)

    outputParam = CallbackOutput(output_name="output", output_type=CallbackOutputTypeEnum.String)

    callback_steps = [
        CallbackStep(
            name=f"callback-step{count}",
            sqs_queue_url="https://sqs.us-east-2.amazonaws.com/123456789012/MyQueue",
            inputs={"arg1": "foo"},
            outputs=[outputParam],
        )
        for count in range(500)
    ]
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count],
        steps=callback_steps,
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role, parallelism_config={"MaxParallelExecutionSteps": 50})
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )
        response = pipeline.describe()
        assert response["ParallelismConfiguration"]["MaxParallelExecutionSteps"] == 50

        pipeline.parameters = [ParameterInteger(name="InstanceCount", default_value=1)]
        response = pipeline.update(role, parallelism_config={"MaxParallelExecutionSteps": 55})
        update_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            update_arn,
        )

        response = pipeline.describe()
        assert response["ParallelismConfiguration"]["MaxParallelExecutionSteps"] == 55

    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 21
0
def test_training_step_with_estimator(pipeline_session, training_input,
                                      hyperparameters):
    estimator = Estimator(
        role=sagemaker.get_execution_role(),
        instance_count=1,
        instance_type=INSTANCE_TYPE,
        sagemaker_session=pipeline_session,
        image_uri=IMAGE_URI,
        hyperparameters=hyperparameters,
    )

    with warnings.catch_warnings(record=True) as w:
        step_args = estimator.fit(inputs=training_input)
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Running within a PipelineSession" in str(w[-1].message)

    with warnings.catch_warnings(record=True) as w:
        step = TrainingStep(
            name="MyTrainingStep",
            step_args=step_args,
            description="TrainingStep description",
            display_name="MyTrainingStep",
            depends_on=["TestStep", "SecondTestStep"],
        )
        assert len(w) == 0

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )
    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTrainingStep",
        "Description": "TrainingStep description",
        "DisplayName": "MyTrainingStep",
        "Type": "Training",
        "DependsOn": ["TestStep", "SecondTestStep"],
        "Arguments": step_args,
    }
    assert step.properties.TrainingJobName.expr == {
        "Get": "Steps.MyTrainingStep.TrainingJobName"
    }
Exemplo n.º 22
0
def test_training_step_with_algorithm_base(algo_estimator, pipeline_session):
    estimator = algo_estimator(
        role=sagemaker.get_execution_role(),
        instance_type=INSTANCE_TYPE,
        instance_count=1,
        sagemaker_session=pipeline_session,
    )
    data = RecordSet(
        "s3://{}/{}".format(pipeline_session.default_bucket(), "dummy"),
        num_records=1000,
        feature_dim=128,
        channel="train",
    )

    with warnings.catch_warnings(record=True) as w:
        step_args = estimator.fit(
            records=data,
            mini_batch_size=1000,
        )
        assert len(w) == 1
        assert issubclass(w[-1].category, UserWarning)
        assert "Running within a PipelineSession" in str(w[-1].message)

    with warnings.catch_warnings(record=True) as w:
        step = TrainingStep(
            name="MyTrainingStep",
            step_args=step_args,
        )
        assert len(w) == 0

    pipeline = Pipeline(
        name="MyPipeline",
        steps=[step],
        sagemaker_session=pipeline_session,
    )
    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyTrainingStep",
        "Type": "Training",
        "Arguments": step_args,
    }
    assert step.properties.TrainingJobName.expr == {
        "Get": "Steps.MyTrainingStep.TrainingJobName"
    }
Exemplo n.º 23
0
def get_pipeline(
    region,
    default_bucket='sagemaker-us-east-1-376337229415',
    model_package_group_name="AbaloneExample", 
    pipeline_name="AbaloneExample",  
    base_job_prefix="abalone", 
):
    """Gets a SageMaker ML Pipeline instance working with on CustomerChurn data.
    Args:
        region: AWS region to create and run the pipeline.
        default_bucket: the bucket to use for storing the artifacts
    Returns:
        an instance of a pipeline
    """
    sagemaker_session = get_session(region, default_bucket)
    role = sagemaker.session.get_execution_role(sagemaker_session)
        
    input_data = ParameterString(
    name="InputDataUrl", 
    default_value='s3://{}/abalone/abalone-dataset.csv'.format(default_bucket),  
        )
    
    step_process = processing_job(processing_instance_type, 
                                  processing_instance_count, 
                                  sagemaker_session, 
                                  role)
    # Training step for generating model artifacts
    model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/Abalonetrain"
    
    image_uri = get_image_uri(framework="xgboost", 
                              region=region, 
                              version="1.0-1", 
                              py_version="py3", 
                              training_instance_type=training_instance_type)

    
    step_train = training_job(image_uri, 
                              training_instance_type, 
                              training_instance_count, 
                              model_path, 
                              sagemaker_session, role)
    # Pipeline instance
    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            processing_instance_type,
            processing_instance_count,
            training_instance_type,
            model_approval_status,
            input_data,
        ],
        steps=[step_process, step_train],
        sagemaker_session=sagemaker_session,
    )
    return pipeline
Exemplo n.º 24
0
def test_pipeline_basic():
    parameter = ParameterString("MyStr")
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[parameter],
        steps=[CustomStep(name="MyStep", input_data=parameter)],
        sagemaker_session=sagemaker_session_mock,
    )
    assert pipeline.to_request() == {
        "Version":
        "2020-12-01",
        "Metadata": {},
        "Parameters": [{
            "Name": "MyStr",
            "Type": "String"
        }],
        "Steps": [{
            "Name": "MyStep",
            "Type": "Training",
            "Arguments": {
                "input_data": parameter
            }
        }],
    }
    assert ordered(json.loads(pipeline.definition())) == ordered({
        "Version":
        "2020-12-01",
        "Metadata": {},
        "Parameters": [{
            "Name": "MyStr",
            "Type": "String"
        }],
        "Steps": [{
            "Name": "MyStep",
            "Type": "Training",
            "Arguments": {
                "input_data": {
                    "Get": "Parameters.MyStr"
                }
            },
        }],
    })
def test_model_bias_check_step(
    sagemaker_session,
    check_job_config,
    model_package_group_name,
    data_config,
    bias_config,
    model_config,
    predictions_config,
):
    model_bias_check_config = ModelBiasCheckConfig(
        data_config=data_config,
        data_bias_config=bias_config,
        model_config=model_config,
        model_predicted_label_config=predictions_config,
        methods="all",
    )
    model_bias_check_step = ClarifyCheckStep(
        name="ModelBiasCheckStep",
        clarify_check_config=model_bias_check_config,
        check_job_config=check_job_config,
        skip_check=False,
        register_new_baseline=False,
        model_package_group_name=model_package_group_name,
        supplied_baseline_constraints="supplied_baseline_constraints",
    )
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[model_package_group_name],
        steps=[model_bias_check_step],
        sagemaker_session=sagemaker_session,
    )

    assert json.loads(
        pipeline.definition())["Steps"][0] == _expected_model_bias_dsl
    assert re.match(
        f"s3://{_DEFAULT_BUCKET}/{_MODEL_MONITOR_S3_PATH}" +
        f"/{_BIAS_MONITORING_CFG_BASE_NAME}-configuration" +
        f"/{_BIAS_MONITORING_CFG_BASE_NAME}-config.*/.*/analysis_config.json",
        model_bias_check_config.monitoring_analysis_config_uri,
    )
Exemplo n.º 26
0
def test_two_step_fail_pipeline_with_str_err_msg(sagemaker_session, role,
                                                 pipeline_name):
    param = ParameterInteger(name="MyInt", default_value=2)
    cond = ConditionEquals(left=param, right=1)
    step_fail = FailStep(
        name="FailStep",
        error_message="Failed due to hitting in else branch",
    )
    step_cond = ConditionStep(
        name="CondStep",
        conditions=[cond],
        if_steps=[],
        else_steps=[step_fail],
    )
    pipeline = Pipeline(
        name=pipeline_name,
        steps=[step_cond],
        sagemaker_session=sagemaker_session,
        parameters=[param],
    )

    try:
        response = pipeline.create(role)
        pipeline_arn = response["PipelineArn"]
        execution = pipeline.start(parameters={})
        response = execution.describe()
        assert response["PipelineArn"] == pipeline_arn

        try:
            execution.wait(delay=30, max_attempts=60)
        except WaiterError:
            pass
        execution_steps = execution.list_steps()

        assert len(execution_steps) == 2
        for execution_step in execution_steps:
            if execution_step["StepName"] == "CondStep":
                assert execution_step["StepStatus"] == "Succeeded"
                continue
            assert execution_step["StepName"] == "FailStep"
            assert execution_step["StepStatus"] == "Failed"
            assert execution_step[
                "FailureReason"] == "Failed due to hitting in else branch"
            metadata = execution_steps[0]["Metadata"]["Fail"]
            assert metadata[
                "ErrorMessage"] == "Failed due to hitting in else branch"

        # Check FailureReason field in ListPipelineExecutions
        executions = sagemaker_session.sagemaker_client.list_pipeline_executions(
            PipelineName=pipeline.name)["PipelineExecutionSummaries"]

        assert len(executions) == 1
        assert executions[0]["PipelineExecutionStatus"] == "Failed"
        assert ("Step failure: One or multiple steps failed"
                in executions[0]["PipelineExecutionFailureReason"])
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 27
0
def test_invalid_pipeline_depended_on_fail_step(sagemaker_session, role,
                                                pipeline_name):
    param = ParameterInteger(name="MyInt", default_value=2)
    cond = ConditionEquals(left=param, right=1)
    step_fail = FailStep(
        name="FailStep",
        error_message="Failed pipeline execution",
    )
    step_cond = ConditionStep(
        name="CondStep",
        conditions=[cond],
        if_steps=[],
        else_steps=[],
        depends_on=["FailStep"],
    )
    pipeline = Pipeline(
        name=pipeline_name,
        steps=[step_cond, step_fail],
        sagemaker_session=sagemaker_session,
        parameters=[param],
    )

    try:
        with pytest.raises(Exception) as error:
            pipeline.create(role)

        assert "CondStep can not depends on FailStep" in str(error.value)
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 28
0
def test_pipeline_upsert(sagemaker_session_mock, role_arn):
    sagemaker_session_mock.side_effect = [
        ClientError(
            operation_name="CreatePipeline",
            error_response={
                "Error": {
                    "Code": "ValidationException",
                    "Message": "Pipeline names must be unique within ...",
                }
            },
        ),
        {},
    ]
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[],
        steps=[],
        sagemaker_session=sagemaker_session_mock,
    )
    pipeline.update(role_arn=role_arn)
    assert sagemaker_session_mock.sagemaker_client.create_pipeline.called_with(
        PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn
    )
    assert sagemaker_session_mock.sagemaker_client.update_pipeline.called_with(
        PipelineName="MyPipeline", PipelineDefinition=pipeline.definition(), RoleArn=role_arn
    )
def test_data_bias_check_step(sagemaker_session, check_job_config,
                              model_package_group_name, bias_config):
    data_bias_data_config = DataConfig(
        s3_data_input_path=_S3_INPUT_PATH,
        s3_output_path=_S3_OUTPUT_PATH,
        s3_analysis_config_output_path=_S3_ANALYSIS_CONFIG_OUTPUT_PATH,
        label="fraud",
        dataset_type="text/csv",
    )
    data_bias_check_config = DataBiasCheckConfig(
        data_config=data_bias_data_config,
        data_bias_config=bias_config,
        methods="all",
        kms_key="kms_key",
    )
    data_bias_check_step = ClarifyCheckStep(
        name="DataBiasCheckStep",
        clarify_check_config=data_bias_check_config,
        check_job_config=check_job_config,
        skip_check=False,
        register_new_baseline=False,
        model_package_group_name=model_package_group_name,
        supplied_baseline_constraints="supplied_baseline_constraints",
        cache_config=CacheConfig(enable_caching=True, expire_after="PT1H"),
    )
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[model_package_group_name],
        steps=[data_bias_check_step],
        sagemaker_session=sagemaker_session,
    )

    assert json.loads(
        pipeline.definition())["Steps"][0] == _expected_data_bias_dsl
    assert re.match(
        f"{_S3_ANALYSIS_CONFIG_OUTPUT_PATH}/{_BIAS_MONITORING_CFG_BASE_NAME}-configuration"
        +
        f"/{_BIAS_MONITORING_CFG_BASE_NAME}-config.*/.*/analysis_config.json",
        data_bias_check_config.monitoring_analysis_config_uri,
    )
def test_lambda_step(sagemaker_session):
    param = ParameterInteger(name="MyInt")
    output_param1 = LambdaOutput(output_name="output1", output_type=LambdaOutputTypeEnum.String)
    output_param2 = LambdaOutput(output_name="output2", output_type=LambdaOutputTypeEnum.Boolean)
    cache_config = CacheConfig(enable_caching=True, expire_after="PT1H")
    lambda_step = LambdaStep(
        name="MyLambdaStep",
        depends_on=["TestStep"],
        lambda_func=Lambda(
            function_arn="arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
            session=sagemaker_session,
        ),
        display_name="MyLambdaStep",
        description="MyLambdaStepDescription",
        inputs={"arg1": "foo", "arg2": 5, "arg3": param},
        outputs=[output_param1, output_param2],
        cache_config=cache_config,
    )
    lambda_step.add_depends_on(["SecondTestStep"])
    pipeline = Pipeline(
        name="MyPipeline",
        parameters=[param],
        steps=[lambda_step],
        sagemaker_session=sagemaker_session,
    )
    assert json.loads(pipeline.definition())["Steps"][0] == {
        "Name": "MyLambdaStep",
        "Type": "Lambda",
        "DependsOn": ["TestStep", "SecondTestStep"],
        "DisplayName": "MyLambdaStep",
        "Description": "MyLambdaStepDescription",
        "FunctionArn": "arn:aws:lambda:us-west-2:123456789012:function:sagemaker_test_lambda",
        "OutputParameters": [
            {"OutputName": "output1", "OutputType": "String"},
            {"OutputName": "output2", "OutputType": "Boolean"},
        ],
        "Arguments": {"arg1": "foo", "arg2": 5, "arg3": {"Get": "Parameters.MyInt"}},
        "CacheConfig": {"Enabled": True, "ExpireAfter": "PT1H"},
    }