Exemplo n.º 1
0
    def _prepare_data(
        self,
        data_name: str,
        x_data: DataFrame,
        y_data: Optional[DataFrame] = None,
        s3_input_type: bool = True,
    ) -> Union[TrainingInput, str]:
        """
        Prepares the data to use in the learner.

        Arguments:
            x_data: the features of the data
            y_data: (optional) the output of the data. Don't provide for predictions

        Returns:
            The s3 input or s3 location of the data
        """
        LOGGER.info("Preparing data for usage")

        # Try to get from cache
        return_data = None
        if data_name != "predict":
            return_data = self.data.get_from_cache("s3", data_name)
        if return_data is not None:
            LOGGER.info("Found s3 data in cache.")
            if s3_input_type:
                return TrainingInput(return_data, content_type="text/csv")
            return return_data

        # Try to get local data from cache
        temp_location = None
        if data_name != "predict":
            temp_location = self.data.get_from_cache("local", data_name)
        # If not available, save to local
        if temp_location is None:
            if not os.path.exists(self.local_save_folder):
                os.makedirs(self.local_save_folder)
            temp_location = f"{self.local_save_folder}/{data_name}.csv"
            if y_data is not None:
                data = pd.concat([y_data, x_data], axis=1)
            else:
                data = x_data
            LOGGER.info("Writing data to local machine")
            data.to_csv(temp_location, index=False, header=False)
        else:
            # Log if present
            LOGGER.info("Found local data location in cache.")

        # Upload to S3
        return_data = self.executor.upload_data(temp_location,
                                                prefix=self.input_data_prefix)
        # Put in cache
        if data_name != "predict":
            self.data.add_to_cache("s3", data_name, return_data)

        if s3_input_type:
            return_data = TrainingInput(return_data, content_type="text/csv")

        return return_data
Exemplo n.º 2
0
def test_dict_of_mixed_input_types():
    input_list = _Job._format_inputs_to_input_config({
        "a":
        "s3://foo/bar",
        "b":
        TrainingInput("s3://whizz/bang")
    })

    expected = [
        {
            "ChannelName": "a",
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "FullyReplicated",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "s3://foo/bar",
                }
            },
        },
        {
            "ChannelName": "b",
            "DataSource": {
                "S3DataSource": {
                    "S3DataDistributionType": "FullyReplicated",
                    "S3DataType": "S3Prefix",
                    "S3Uri": "s3://whizz/bang",
                }
            },
        },
    ]

    # convert back into map for comparison so list order (which is arbitrary) is ignored
    assert {c["ChannelName"]: c
            for c in input_list} == {c["ChannelName"]: c
                                     for c in expected}
def test_training_input_all_arguments():
    prefix = "pre"
    distribution = "FullyReplicated"
    compression = "Gzip"
    content_type = "text/csv"
    record_wrapping = "RecordIO"
    s3_data_type = "Manifestfile"
    input_mode = "Pipe"
    result = TrainingInput(
        s3_data=prefix,
        distribution=distribution,
        compression=compression,
        input_mode=input_mode,
        content_type=content_type,
        record_wrapping=record_wrapping,
        s3_data_type=s3_data_type,
    )
    expected = {
        "DataSource": {
            "S3DataSource": {
                "S3DataDistributionType": distribution,
                "S3DataType": s3_data_type,
                "S3Uri": prefix,
            }
        },
        "CompressionType": compression,
        "ContentType": content_type,
        "RecordWrapperType": record_wrapping,
        "InputMode": input_mode,
    }

    assert result.config == expected
Exemplo n.º 4
0
def test_format_inputs_to_input_config_training_input():
    inputs = TrainingInput(BUCKET_NAME)

    channels = _Job._format_inputs_to_input_config(inputs)

    assert (channels[0]["DataSource"]["S3DataSource"]["S3Uri"] ==
            inputs.config["DataSource"]["S3DataSource"]["S3Uri"])
Exemplo n.º 5
0
def test_format_string_uri_input():
    inputs = TrainingInput(BUCKET_NAME)

    s3_uri_input = _Job._format_string_uri_input(inputs)

    assert (s3_uri_input.config["DataSource"]["S3DataSource"]["S3Uri"] ==
            inputs.config["DataSource"]["S3DataSource"]["S3Uri"])
def main():
    print('Starting model training.')
    print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.')

    hyperparameters = {
        "max_depth": "5",
        "eta": "0.2",
        "gamma": "4",
        "min_child_weight": "6",
        "subsample": "0.7",
        "objective": "reg:squarederror",
        "num_round": "50",
        "verbosity": "2",
    }

    xgb_script_mode_estimator = XGBoost(
        entry_point="./code/abalone.py",
        hyperparameters=hyperparameters,
        role=DUMMY_IAM_ROLE,
        instance_count=1,
        instance_type='local',
        framework_version="1.2-1"
    )

    train_input = TrainingInput("file://./data/train/abalone", content_type="text/libsvm")

    xgb_script_mode_estimator.fit({"train": train_input, "validation": train_input})

    print('Completed model training')

    model_data = xgb_script_mode_estimator.model_data
    print(model_data)

    xgb_inference_model = XGBoostModel(
        model_data=model_data,
        role=DUMMY_IAM_ROLE,
        entry_point="./code/inference.py",
        framework_version="1.2-1",
    )

    print('Deploying endpoint in local mode')
    predictor = xgb_inference_model.deploy(
        initial_instance_count=1,
        instance_type="local",
    )

    a_young_abalone = "6 1:3 2:0.37 3:0.29 4:0.095 5:0.249 6:0.1045 7:0.058 8:0.067"
    do_inference_on_local_endpoint(predictor, a_young_abalone)

    an_old_abalone = "15 1:1 2:0.655 3:0.53 4:0.175 5:1.2635 6:0.486 7:0.2635 8:0.415"
    do_inference_on_local_endpoint(predictor, an_old_abalone)

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
def test_training_input_all_defaults(caplog):
    prefix = "pre"
    actual = TrainingInput(s3_data=prefix)
    expected = {
        "DataSource": {
            "S3DataSource": {
                "S3DataDistributionType": "FullyReplicated",
                "S3DataType": "S3Prefix",
                "S3Uri": prefix,
            }
        }
    }
    assert actual.config == expected
Exemplo n.º 8
0
def test_load_config(estimator):
    inputs = TrainingInput(BUCKET_NAME)

    config = _Job._load_config(inputs, estimator)

    assert config["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] == BUCKET_NAME
    assert config["role"] == ROLE
    assert config["output_config"]["S3OutputPath"] == S3_OUTPUT_PATH
    assert "KmsKeyId" not in config["output_config"]
    assert config["resource_config"]["InstanceCount"] == INSTANCE_COUNT
    assert config["resource_config"]["InstanceType"] == INSTANCE_TYPE
    assert config["resource_config"]["VolumeSizeInGB"] == VOLUME_SIZE
    assert config["stop_condition"]["MaxRuntimeInSeconds"] == MAX_RUNTIME
Exemplo n.º 9
0
def test_load_config_with_code_channel_no_code_uri(framework):
    inputs = TrainingInput(BUCKET_NAME)

    framework.model_uri = MODEL_URI
    framework.model_channel_name = MODEL_CHANNEL_NAME
    framework._enable_network_isolation = True
    config = _Job._load_config(inputs, framework)

    assert len(config["input_config"]) == 2
    assert config["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] == BUCKET_NAME
    assert config["role"] == ROLE
    assert config["output_config"]["S3OutputPath"] == S3_OUTPUT_PATH
    assert "KmsKeyId" not in config["output_config"]
    assert config["resource_config"]["InstanceCount"] == INSTANCE_COUNT
    assert config["resource_config"]["InstanceType"] == INSTANCE_TYPE
def run_test(ecr_image, sagemaker_session, instance_type, framework_version, test_data,
             record_wrapper_type=None):
    source_path = os.path.join(os.path.dirname(__file__), '..', '..', 'resources', 'pipemode')
    script = os.path.join(source_path, 'pipemode.py')
    estimator = TensorFlow(entry_point=script,
                           role='SageMakerRole',
                           instance_type=instance_type,
                           instance_count=1,
                           sagemaker_session=sagemaker_session,
                           image_uri=ecr_image,
                           framework_version=framework_version,
                           input_mode='Pipe',
                           hyperparameters={'dimension': DIMENSION})
    input = TrainingInput(s3_data=test_data(sagemaker_session),
                          distribution='FullyReplicated',
                          record_wrapping=record_wrapper_type,
                          input_mode='Pipe')
    with timeout(minutes=20):
        estimator.fit({'elizabeth': input},
                      job_name=unique_name_from_base('test-sagemaker-pipemode'))
Exemplo n.º 11
0
def test_format_input_training_input():
    input_dict = _Job._format_inputs_to_input_config(
        TrainingInput(
            "s3://foo/bar",
            distribution="ShardedByS3Key",
            compression="gzip",
            content_type="whizz",
            record_wrapping="bang",
        ))
    assert input_dict == [{
        "CompressionType": "gzip",
        "ChannelName": "training",
        "ContentType": "whizz",
        "DataSource": {
            "S3DataSource": {
                "S3DataType": "S3Prefix",
                "S3DataDistributionType": "ShardedByS3Key",
                "S3Uri": "s3://foo/bar",
            }
        },
        "RecordWrapperType": "bang",
    }]
def test_conditional_pytorch_training_model_registration(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")
    good_enough_input = ParameterInteger(name="GoodEnoughInput",
                                         default_value=1)
    in_condition_input = ParameterString(name="Foo", default_value="Foo")

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
    )
    step_train = TrainingStep(
        name="pytorch-train",
        estimator=pytorch_estimator,
        inputs=inputs,
    )

    step_register = RegisterModel(
        name="pytorch-register-model",
        estimator=pytorch_estimator,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["*"],
        response_types=["*"],
        inference_instances=["*"],
        transform_instances=["*"],
        description="test-description",
    )

    model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        sagemaker_session=sagemaker_session,
        role=role,
    )
    model_inputs = CreateModelInput(
        instance_type="ml.m5.large",
        accelerator_type="ml.eia1.medium",
    )
    step_model = CreateModelStep(
        name="pytorch-model",
        model=model,
        inputs=model_inputs,
    )

    step_cond = ConditionStep(
        name="cond-good-enough",
        conditions=[
            ConditionGreaterThanOrEqualTo(left=good_enough_input, right=1),
            ConditionIn(value=in_condition_input, in_values=["foo", "bar"]),
        ],
        if_steps=[step_train, step_register],
        else_steps=[step_model],
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            in_condition_input,
            good_enough_input,
            instance_count,
            instance_type,
        ],
        steps=[step_cond],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start(parameters={"GoodEnoughInput": 0})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 13
0
def test_tuning_multi_algos(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
    script_dir,
    athena_dataset_definition,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    input_data = f"s3://sagemaker-sample-data-{region_name}/processing/census/census-income.csv"

    sklearn_processor = SKLearnProcessor(
        framework_version="0.20.0",
        instance_type=instance_type,
        instance_count=instance_count,
        base_job_name="test-sklearn",
        sagemaker_session=sagemaker_session,
        role=role,
    )

    property_file = PropertyFile(name="DataAttributes",
                                 output_name="attributes",
                                 path="attributes.json")

    step_process = ProcessingStep(
        name="my-process",
        display_name="ProcessingStep",
        description="description for Processing step",
        processor=sklearn_processor,
        inputs=[
            ProcessingInput(source=input_data,
                            destination="/opt/ml/processing/input"),
            ProcessingInput(dataset_definition=athena_dataset_definition),
        ],
        outputs=[
            ProcessingOutput(output_name="train_data",
                             source="/opt/ml/processing/train"),
            ProcessingOutput(output_name="attributes",
                             source="/opt/ml/processing/attributes.json"),
        ],
        property_files=[property_file],
        code=os.path.join(script_dir, "preprocessing.py"),
    )

    static_hp_1 = ParameterString(name="InstanceType",
                                  default_value="ml.m5.xlarge")
    json_get_hp = JsonGet(step_name=step_process.name,
                          property_file=property_file,
                          json_path="train_size")
    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
        hyperparameters={
            "static-hp": static_hp_1,
            "train_size": json_get_hp
        },
    )

    min_batch_size = ParameterString(name="MinBatchSize", default_value="64")
    max_batch_size = json_get_hp

    tuner = HyperparameterTuner.create(
        estimator_dict={
            "estimator-1": pytorch_estimator,
            "estimator-2": pytorch_estimator,
        },
        objective_metric_name_dict={
            "estimator-1": "test:acc",
            "estimator-2": "test:acc",
        },
        hyperparameter_ranges_dict={
            "estimator-1": {
                "batch-size": IntegerParameter(min_batch_size, max_batch_size)
            },
            "estimator-2": {
                "batch-size": IntegerParameter(min_batch_size, max_batch_size)
            },
        },
        metric_definitions_dict={
            "estimator-1": [{
                "Name": "test:acc",
                "Regex": "Overall test accuracy: (.*?);"
            }],
            "estimator-2": [{
                "Name": "test:acc",
                "Regex": "Overall test accuracy: (.*?);"
            }],
        },
    )

    inputs = {
        "estimator-1": TrainingInput(s3_data=input_path),
        "estimator-2": TrainingInput(s3_data=input_path),
    }

    step_tune = TuningStep(
        name="my-tuning-step",
        tuner=tuner,
        inputs=inputs,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            instance_count, instance_type, min_batch_size, max_batch_size
        ],
        steps=[step_process, step_tune],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 14
0
 def s3_test_in(self):
     return TrainingInput(s3_data=s3_test_loc.as_uri(), content_type='csv')
Exemplo n.º 15
0
def test_tuning_single_algo(
    sagemaker_session,
    role,
    cpu_instance_type,
    pipeline_name,
    region_name,
):
    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    entry_point = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    pytorch_estimator = PyTorch(
        entry_point=entry_point,
        role=role,
        framework_version="1.5.0",
        py_version="py3",
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        enable_sagemaker_metrics=True,
        max_retry_attempts=3,
    )

    min_batch_size = ParameterInteger(name="MinBatchSize", default_value=64)
    max_batch_size = ParameterInteger(name="MaxBatchSize", default_value=128)
    hyperparameter_ranges = {
        "batch-size": IntegerParameter(min_batch_size, max_batch_size),
    }

    tuner = HyperparameterTuner(
        estimator=pytorch_estimator,
        objective_metric_name="test:acc",
        objective_type="Maximize",
        hyperparameter_ranges=hyperparameter_ranges,
        metric_definitions=[{
            "Name": "test:acc",
            "Regex": "Overall test accuracy: (.*?);"
        }],
        max_jobs=2,
        max_parallel_jobs=2,
    )

    step_tune = TuningStep(
        name="my-tuning-step",
        tuner=tuner,
        inputs=inputs,
    )

    best_model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_tune.get_top_model_s3_uri(
            top_k=0,
            s3_bucket=sagemaker_session.default_bucket(),
        ),
        sagemaker_session=sagemaker_session,
        role=role,
    )
    model_inputs = CreateModelInput(
        instance_type="ml.m5.large",
        accelerator_type="ml.eia1.medium",
    )
    step_best_model = CreateModelStep(
        name="1st-model",
        model=best_model,
        inputs=model_inputs,
    )

    second_best_model = Model(
        image_uri=pytorch_estimator.training_image_uri(),
        model_data=step_tune.get_top_model_s3_uri(
            top_k=1,
            s3_bucket=sagemaker_session.default_bucket(),
        ),
        sagemaker_session=sagemaker_session,
        role=role,
        entry_point=entry_point,
        source_dir=base_dir,
    )

    step_second_best_model = CreateModelStep(
        name="2nd-best-model",
        model=second_best_model,
        inputs=model_inputs,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            instance_count, instance_type, min_batch_size, max_batch_size
        ],
        steps=[step_tune, step_best_model, step_second_best_model],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={})
            assert re.match(
                rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
                execution.arn,
            )
            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 3
            for step in execution_steps:
                assert step["StepStatus"] == "Succeeded"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
Exemplo n.º 16
0
 def s3_val_in(self):
     return TrainingInput("s3://{}/{}".format(s3_data_loc, "/val.csv"),
                          content_type='csv')
classifier.set_hyperparameters(num_layers=152,
                               use_pretrained_model=0,
                               image_shape='3,224,224',
                               num_classes=2,
                               mini_batch_size=32,
                               epochs=30,
                               learning_rate=0.01,
                               num_training_samples=963,
                               precision_dtype='float32')

#Training channels

from sagemaker import s3train

train_data = TrainingInput(s3train,
                           distribution='FullyReplicated',
                           content_type='application/x-image',
                           s3_data_type='S3Prefix')
validation_data = TrainingInput(s3validation,
                                distribution='FullyReplicated',
                                content_type='application/x-image',
                                s3_data_type='S3Prefix')
train_data_lst = TrainingInput(s3train_lst,
                               distribution='FullyReplicated',
                               content_type='application/x-image',
                               s3_data_type='S3Prefix')
validation_data_lst = TrainingInput(s3validation_lst,
                                    distribution='FullyReplicated',
                                    content_type='application/x-image',
                                    s3_data_type='S3Prefix')

data_channels = {
def test_sklearn_xgboost_sip_model_registration(sagemaker_session, role,
                                                pipeline_name, region_name):
    prefix = "sip"
    bucket_name = sagemaker_session.default_bucket()
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    sklearn_processor = SKLearnProcessor(
        role=role,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.20.0",
        sagemaker_session=sagemaker_session,
    )

    # The path to the raw data.
    raw_data_path = "s3://{0}/{1}/data/raw/".format(bucket_name, prefix)
    raw_data_path_param = ParameterString(name="raw_data_path",
                                          default_value=raw_data_path)

    # The output path to the training data.
    train_data_path = "s3://{0}/{1}/data/preprocessed/train/".format(
        bucket_name, prefix)
    train_data_path_param = ParameterString(name="train_data_path",
                                            default_value=train_data_path)

    # The output path to the validation data.
    val_data_path = "s3://{0}/{1}/data/preprocessed/val/".format(
        bucket_name, prefix)
    val_data_path_param = ParameterString(name="val_data_path",
                                          default_value=val_data_path)

    # The training output path for the model.
    output_path = "s3://{0}/{1}/output/".format(bucket_name, prefix)
    output_path_param = ParameterString(name="output_path",
                                        default_value=output_path)

    # The output path to the featurizer model.
    model_path = "s3://{0}/{1}/output/sklearn/".format(bucket_name, prefix)
    model_path_param = ParameterString(name="model_path",
                                       default_value=model_path)

    inputs = [
        ProcessingInput(
            input_name="raw_data",
            source=raw_data_path_param,
            destination="/opt/ml/processing/input",
        )
    ]

    outputs = [
        ProcessingOutput(
            output_name="train_data",
            source="/opt/ml/processing/train",
            destination=train_data_path_param,
        ),
        ProcessingOutput(
            output_name="val_data",
            source="/opt/ml/processing/val",
            destination=val_data_path_param,
        ),
        ProcessingOutput(
            output_name="model",
            source="/opt/ml/processing/model",
            destination=model_path_param,
        ),
    ]

    base_dir = os.path.join(DATA_DIR, "sip")
    code_path = os.path.join(base_dir, "preprocessor.py")

    processing_step = ProcessingStep(
        name="Processing",
        code=code_path,
        processor=sklearn_processor,
        inputs=inputs,
        outputs=outputs,
        job_arguments=["--train-test-split-ratio", "0.2"],
    )

    entry_point = "training.py"
    source_dir = base_dir
    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)

    estimator = XGBoost(
        entry_point=entry_point,
        source_dir=source_dir,
        output_path=output_path_param,
        code_location=code_location,
        instance_type=instance_type,
        instance_count=instance_count,
        framework_version="0.90-2",
        sagemaker_session=sagemaker_session,
        py_version="py3",
        role=role,
    )

    training_step = TrainingStep(
        name="Training",
        estimator=estimator,
        inputs={
            "train":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["train_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
            "validation":
            TrainingInput(
                s3_data=processing_step.properties.ProcessingOutputConfig.
                Outputs["val_data"].S3Output.S3Uri,
                content_type="text/csv",
            ),
        },
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "sklearn_source_dir")

    sklearn_model = SKLearnModel(
        name="sklearn-model",
        model_data=processing_step.properties.ProcessingOutputConfig.
        Outputs["model"].S3Output.S3Uri,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        role=role,
        sagemaker_session=sagemaker_session,
        framework_version="0.20.0",
        py_version="py3",
    )

    code_location = "s3://{0}/{1}/code".format(bucket_name, prefix)
    source_dir = os.path.join(base_dir, "xgboost_source_dir")

    xgboost_model = XGBoostModel(
        name="xgboost-model",
        model_data=training_step.properties.ModelArtifacts.S3ModelArtifacts,
        entry_point="inference.py",
        source_dir=source_dir,
        code_location=code_location,
        framework_version="0.90-2",
        py_version="py3",
        role=role,
        sagemaker_session=sagemaker_session,
    )

    pipeline_model = PipelineModel([xgboost_model, sklearn_model],
                                   role,
                                   sagemaker_session=sagemaker_session)

    step_register = RegisterModel(
        name="AbaloneRegisterModel",
        model=pipeline_model,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.xlarge"],
        transform_instances=["ml.m5.xlarge"],
        model_package_group_name="windturbine",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            raw_data_path_param,
            train_data_path_param,
            val_data_path_param,
            model_path_param,
            instance_type,
            instance_count,
            output_path_param,
        ],
        steps=[processing_step, training_step, step_register],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.upsert(role_arn=role)
        create_arn = response["PipelineArn"]
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )

        execution = pipeline.start()
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_training_job_with_debugger_and_profiler(
    sagemaker_session,
    pipeline_name,
    role,
    pytorch_training_latest_version,
    pytorch_training_latest_py_version,
):
    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")

    rules = [
        Rule.sagemaker(rule_configs.vanishing_gradient()),
        Rule.sagemaker(base_config=rule_configs.all_zero(), rule_parameters={"tensor_regex": ".*"}),
        Rule.sagemaker(rule_configs.loss_not_decreasing()),
    ]
    debugger_hook_config = DebuggerHookConfig(
        s3_output_path=(f"s3://{sagemaker_session.default_bucket()}/{uuid.uuid4()}/tensors")
    )

    base_dir = os.path.join(DATA_DIR, "pytorch_mnist")
    script_path = os.path.join(base_dir, "mnist.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "training"),
        key_prefix="integ-test-data/pytorch_mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    pytorch_estimator = PyTorch(
        entry_point=script_path,
        role="SageMakerRole",
        framework_version=pytorch_training_latest_version,
        py_version=pytorch_training_latest_py_version,
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        rules=rules,
        debugger_hook_config=debugger_hook_config,
    )

    step_train = TrainingStep(
        name="pytorch-train",
        estimator=pytorch_estimator,
        inputs=inputs,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count, instance_type],
        steps=[step_train],
        sagemaker_session=sagemaker_session,
    )

    for _ in retries(
        max_retry_count=5,
        exception_message_prefix="Waiting for a successful execution of pipeline",
        seconds_to_sleep=10,
    ):
        try:
            response = pipeline.create(role)
            create_arn = response["PipelineArn"]

            execution = pipeline.start()
            response = execution.describe()
            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=10, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if failure_reason != "":
                logging.error(f"Pipeline execution failed with error: {failure_reason}.Retrying..")
                continue
            assert execution_steps[0]["StepName"] == "pytorch-train"
            assert execution_steps[0]["StepStatus"] == "Succeeded"

            training_job_arn = execution_steps[0]["Metadata"]["TrainingJob"]["Arn"]
            job_description = sagemaker_session.sagemaker_client.describe_training_job(
                TrainingJobName=training_job_arn.split("/")[1]
            )

            for index, rule in enumerate(rules):
                config = job_description["DebugRuleConfigurations"][index]
                assert config["RuleConfigurationName"] == rule.name
                assert config["RuleEvaluatorImage"] == rule.image_uri
                assert config["VolumeSizeInGB"] == 0
                assert (
                    config["RuleParameters"]["rule_to_invoke"]
                    == rule.rule_parameters["rule_to_invoke"]
                )
            assert job_description["DebugHookConfig"] == debugger_hook_config._to_request_dict()

            assert job_description["ProfilingStatus"] == "Enabled"
            assert job_description["ProfilerConfig"]["ProfilingIntervalInMilliseconds"] == 500
            break
        finally:
            try:
                pipeline.delete()
            except Exception:
                pass
def test_model_registration_with_tensorflow_model_with_pipeline_model(
        sagemaker_session, role, tf_full_version, tf_full_py_version,
        pipeline_name, region_name):
    base_dir = os.path.join(DATA_DIR, "tensorflow_mnist")
    entry_point = os.path.join(base_dir, "mnist_v2.py")
    input_path = sagemaker_session.upload_data(
        path=os.path.join(base_dir, "data"),
        key_prefix="integ-test-data/tf-scriptmode/mnist/training",
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType",
                                    default_value="ml.m5.xlarge")

    tensorflow_estimator = TensorFlow(
        entry_point=entry_point,
        role=role,
        instance_count=instance_count,
        instance_type=instance_type,
        framework_version=tf_full_version,
        py_version=tf_full_py_version,
        sagemaker_session=sagemaker_session,
    )
    step_train = TrainingStep(
        name="MyTrain",
        estimator=tensorflow_estimator,
        inputs=inputs,
    )

    model = TensorFlowModel(
        entry_point=entry_point,
        framework_version="2.4",
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        role=role,
        sagemaker_session=sagemaker_session,
    )

    pipeline_model = PipelineModel(name="MyModelPipeline",
                                   models=[model],
                                   role=role,
                                   sagemaker_session=sagemaker_session)

    step_register_model = RegisterModel(
        name="MyRegisterModel",
        model=pipeline_model,
        model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
        content_types=["application/json"],
        response_types=["application/json"],
        inference_instances=["ml.t2.medium", "ml.m5.large"],
        transform_instances=["ml.m5.large"],
        model_package_group_name=f"{pipeline_name}TestModelPackageGroup",
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[
            instance_count,
            instance_type,
        ],
        steps=[step_train, step_register_model],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={})
            assert re.match(
                rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
                execution.arn,
            )
            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 3
            for step in execution_steps:
                assert step["StepStatus"] == "Succeeded"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_training_step_with_output_path_as_join(
    sagemaker_session, role, tf_full_version, tf_full_py_version, pipeline_name, region_name
):
    base_dir = os.path.join(DATA_DIR, "dummy_tensor")
    input_path = sagemaker_session.upload_data(
        path=base_dir, key_prefix="integ-test-data/estimator/training"
    )
    inputs = TrainingInput(s3_data=input_path)

    instance_count = ParameterInteger(name="InstanceCount", default_value=1)
    instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge")
    output_path = Join(
        on="/", values=["s3:/", f"{sagemaker_session.default_bucket()}", f"{pipeline_name}Train"]
    )

    image_uri = image_uris.retrieve("factorization-machines", sagemaker_session.boto_region_name)
    estimator = Estimator(
        image_uri=image_uri,
        role=role,
        instance_count=instance_count,
        instance_type=instance_type,
        sagemaker_session=sagemaker_session,
        output_path=output_path,
    )
    estimator.set_hyperparameters(
        num_factors=10, feature_dim=784, mini_batch_size=100, predictor_type="binary_classifier"
    )
    step_train = TrainingStep(
        name="MyTrain",
        estimator=estimator,
        inputs=inputs,
    )

    pipeline = Pipeline(
        name=pipeline_name,
        parameters=[instance_count, instance_type],
        steps=[step_train],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}",
            create_arn,
        )

        execution = pipeline.start(parameters={})
        assert re.match(
            rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/",
            execution.arn,
        )
        try:
            execution.wait(delay=30, max_attempts=60)
        except WaiterError:
            pass
        execution_steps = execution.list_steps()

        assert len(execution_steps) == 1
        assert execution_steps[0]["StepName"] == "MyTrain"
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass