Пример #1
0
def test_algorithm_hyperparameter_categorical_range(sagemaker_session):
    hyperparameters = [
        {
            'Description': 'A continuous hyperparameter',
            'Type': 'Categorical',
            'Name': 'hp1',
            'Range': {'CategoricalParameterRangeSpecification': {'Values': ['TF', 'MXNet']}},
            'IsTunable': True,
            'IsRequired': False,
            'DefaultValue': '100',
        }
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification']['SupportedHyperParameters'] = hyperparameters

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    estimator.set_hyperparameters(hp1='MXNet')
    estimator.set_hyperparameters(hp1='TF')

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp1='Chainer')

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp1='MxNET')
Пример #2
0
def test_algorithm_create_transformer(create_model, sagemaker_session):
    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=DESCRIBE_ALGORITHM_RESPONSE)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    estimator.latest_training_job = _TrainingJob(sagemaker_session,
                                                 'some-job-name')
    model = Mock()
    model.name = 'my-model'
    create_model.return_value = model

    transformer = estimator.transformer(instance_count=1,
                                        instance_type='ml.m4.xlarge')

    assert isinstance(transformer, Transformer)
    create_model.assert_called()
    assert transformer.model_name == 'my-model'
Пример #3
0
def test_algorithm_hyperparameter_categorical_range(session):
    hyperparameters = [
        {
            "Description": "A continuous hyperparameter",
            "Type": "Categorical",
            "Name": "hp1",
            "Range": {"CategoricalParameterRangeSpecification": {"Values": ["TF", "MXNet"]}},
            "IsTunable": True,
            "IsRequired": False,
            "DefaultValue": "100",
        }
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"]["SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.2xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    estimator.set_hyperparameters(hp1="MXNet")
    estimator.set_hyperparameters(hp1="TF")

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp1="Chainer")

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp1="MxNET")
Пример #4
0
def test_algorithm_create_transformer_with_product_id(create_model,
                                                      sagemaker_session):
    response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    response['ProductId'] = 'some-product-id'
    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=response)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    estimator.latest_training_job = _TrainingJob(sagemaker_session,
                                                 'some-job-name')
    model = Mock()
    model.name = 'my-model'
    create_model.return_value = model

    transformer = estimator.transformer(instance_count=1,
                                        instance_type='ml.m4.xlarge')
    assert transformer.env is None
Пример #5
0
def test_algorithm_train_instance_types_valid_instance_types(session):
    describe_algo_response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    instance_types = ["ml.m4.xlarge", "ml.m5.2xlarge"]

    describe_algo_response["TrainingSpecification"][
        "SupportedTrainingInstanceTypes"] = instance_types

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=describe_algo_response)

    AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        instance_type="ml.m4.xlarge",
        instance_count=1,
        sagemaker_session=session,
    )

    AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        instance_type="ml.m5.2xlarge",
        instance_count=1,
        sagemaker_session=session,
    )
Пример #6
0
def test_algorithm_train_instance_types_valid_instance_types(sagemaker_session):
    describe_algo_response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    train_instance_types = ['ml.m4.xlarge', 'ml.m5.2xlarge']

    describe_algo_response['TrainingSpecification'][
        'SupportedTrainingInstanceTypes'
    ] = train_instance_types

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=describe_algo_response
    )

    AlgorithmEstimator(
        algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    AlgorithmEstimator(
        algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m5.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )
def test_prepare_for_training_with_name_based_on_algorithm(sagemaker_session):
    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-west-2:1234:algorithm/scikit-decision-trees-1542410022',
        role='some_image',
        train_instance_count=1,
        train_instance_type='ml.m4.xlarge',
        sagemaker_session=sagemaker_session)

    estimator._prepare_for_training()
    assert 'scikit-decision-trees-1542410022' in estimator._current_job_name
Пример #8
0
def test_algorithm_enable_network_isolation_no_product_id(session):
    session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE)

    estimator = AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    network_isolation = estimator.enable_network_isolation()
    assert network_isolation is False
Пример #9
0
def test_algorithm_create_transformer_without_completed_training_job(session):
    session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE)

    estimator = AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    with pytest.raises(RuntimeError) as error:
        estimator.transformer(instance_count=1, instance_type="ml.m4.xlarge")
        assert "No finished training job found associated with this estimator" in str(error)
Пример #10
0
def test_algorithm_hyperparameter_continuous_range_valid_range(session):
    hyperparameters = [{
        "Description": "A continuous hyperparameter",
        "Type": "Continuous",
        "Name": "max_leaf_nodes",
        "Range": {
            "ContinuousParameterRangeSpecification": {
                "MinValue": "0.0",
                "MaxValue": "1.0"
            }
        },
        "IsTunable": True,
        "IsRequired": False,
        "DefaultValue": "100",
    }]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"][
        "SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.2xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    estimator.set_hyperparameters(max_leaf_nodes=0)
    estimator.set_hyperparameters(max_leaf_nodes=1.0)
    estimator.set_hyperparameters(max_leaf_nodes=0.5)
    estimator.set_hyperparameters(max_leaf_nodes=1)
Пример #11
0
def test_algorithm_hyperparameter_continuous_range_valid_range(sagemaker_session):
    hyperparameters = [
        {
            'Description': 'A continuous hyperparameter',
            'Type': 'Continuous',
            'Name': 'max_leaf_nodes',
            'Range': {
                'ContinuousParameterRangeSpecification': {'MinValue': '0.0', 'MaxValue': '1.0'}
            },
            'IsTunable': True,
            'IsRequired': False,
            'DefaultValue': '100',
        }
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification']['SupportedHyperParameters'] = hyperparameters

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    estimator.set_hyperparameters(max_leaf_nodes=0)
    estimator.set_hyperparameters(max_leaf_nodes=1.0)
    estimator.set_hyperparameters(max_leaf_nodes=0.5)
    estimator.set_hyperparameters(max_leaf_nodes=1)
Пример #12
0
def test_algorithm_enable_network_isolation_with_product_id(session):
    response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    response["ProductId"] = "some-product-id"
    session.sagemaker_client.describe_algorithm = Mock(return_value=response)

    estimator = AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    network_isolation = estimator.enable_network_isolation()
    assert network_isolation is True
Пример #13
0
def test_algorithm_required_hyperparameters_are_provided(session):
    hyperparameters = [
        {
            "Description": "A categorical hyperparameter",
            "Type": "Categorical",
            "Name": "hp1",
            "Range": {
                "CategoricalParameterRangeSpecification": {
                    "Values": ["TF", "MXNet"]
                }
            },
            "IsTunable": True,
            "IsRequired": True,
        },
        {
            "Name": "hp2",
            "Description": "A categorical hyperparameter",
            "Type": "Categorical",
            "IsTunable": False,
            "IsRequired": True,
        },
        {
            "Name": "free_text_hp1",
            "Description": "You can write anything here",
            "Type": "FreeText",
            "IsTunable": False,
            "IsRequired": True,
        },
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"][
        "SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.2xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # All 3 Hyperparameters are provided
    estimator.set_hyperparameters(hp1="TF", hp2="TF2", free_text_hp1="Hello!")
Пример #14
0
def test_algorithm_distributed_training_validation(session):
    distributed_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    distributed_algo["TrainingSpecification"][
        "SupportsDistributedTraining"] = True

    single_instance_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    single_instance_algo["TrainingSpecification"][
        "SupportsDistributedTraining"] = False

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=distributed_algo)

    # Distributed training should work for Distributed and Single instance.
    AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=2,
        sagemaker_session=session,
    )

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=single_instance_algo)

    # distributed training on a single instance algorithm should fail.
    with pytest.raises(ValueError):
        AlgorithmEstimator(
            algorithm_arn=
            "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
            role="SageMakerRole",
            train_instance_type="ml.m5.2xlarge",
            train_instance_count=2,
            sagemaker_session=session,
        )
Пример #15
0
def test_algorithm_required_hyperparameters_are_provided(sagemaker_session):
    hyperparameters = [{
        'Description': 'A categorical hyperparameter',
        'Type': 'Categorical',
        'Name': 'hp1',
        'Range': {
            'CategoricalParameterRangeSpecification': {
                'Values': ['TF', 'MXNet']
            }
        },
        'IsTunable': True,
        'IsRequired': True,
    }, {
        'Name': 'hp2',
        'Description': 'A categorical hyperparameter',
        'Type': 'Categorical',
        'IsTunable': False,
        'IsRequired': True
    }, {
        'Name': 'free_text_hp1',
        'Description': 'You can write anything here',
        'Type': 'FreeText',
        'IsTunable': False,
        'IsRequired': True
    }]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification'][
        'SupportedHyperParameters'] = hyperparameters

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # All 3 Hyperparameters are provided
    estimator.set_hyperparameters(hp1='TF', hp2='TF2', free_text_hp1='Hello!')
Пример #16
0
def test_algorithm_supported_with_spot_instances(session):
    session.sagemaker_client.describe_algorithm = Mock(return_value=DESCRIBE_ALGORITHM_RESPONSE)

    assert AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        instance_type="ml.m4.xlarge",
        instance_count=1,
        use_spot_instances=True,
        max_wait=500,
        sagemaker_session=session,
    )
Пример #17
0
def test_algorithm_hyperparameter_integer_range_invalid_range(session):
    hyperparameters = [
        {
            "Description": "Grow a tree with max_leaf_nodes in best-first fashion.",
            "Type": "Integer",
            "Name": "max_leaf_nodes",
            "Range": {
                "IntegerParameterRangeSpecification": {"MinValue": "1", "MaxValue": "100000"}
            },
            "IsTunable": True,
            "IsRequired": False,
            "DefaultValue": "100",
        }
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"]["SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn="arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        instance_type="ml.m4.2xlarge",
        instance_count=1,
        sagemaker_session=session,
    )

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(max_leaf_nodes=0)

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(max_leaf_nodes=100001)
Пример #18
0
def test_algorithm_no_required_hyperparameters(sagemaker_session):
    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    del some_algo['TrainingSpecification']['SupportedHyperParameters']

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    # Calling AlgorithmEstimator() with unset required hyperparameters
    # should fail if they are required.
    # Pass training and hyperparameters channels. This should work
    assert AlgorithmEstimator(
        algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )
def test_algorithm_encrypt_inter_container_traffic(session):
    response = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    response['encrypt_inter_container_traffic'] = True
    session.sagemaker_client.describe_algorithm = Mock(return_value=response)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=session,
        encrypt_inter_container_traffic=True)

    encrypt_inter_container_traffic = estimator.encrypt_inter_container_traffic
    assert encrypt_inter_container_traffic is True
Пример #20
0
def test_algorithm_required_hyperparameters_not_provided(session):
    hyperparameters = [
        {
            "Description": "A continuous hyperparameter",
            "Type": "Categorical",
            "Name": "hp1",
            "Range": {
                "CategoricalParameterRangeSpecification": {
                    "Values": ["TF", "MXNet"]
                }
            },
            "IsTunable": True,
            "IsRequired": True,
        },
        {
            "Name": "hp2",
            "Description": "A continuous hyperparameter",
            "Type": "Categorical",
            "IsTunable": False,
            "IsRequired": True,
        },
    ]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo["TrainingSpecification"][
        "SupportedHyperParameters"] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.2xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # hp1 is required and was not provided
    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp2="TF2")

    # Calling fit with unset required hyperparameters should fail
    # this covers the use case of not calling set_hyperparameters() explicitly
    with pytest.raises(ValueError):
        estimator.fit({"training": "s3://some/place"})
Пример #21
0
def test_algorithm_trainining_channels_with_invalid_channels(
        sagemaker_session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # Passing only validation should fail as training is required.
    with pytest.raises(ValueError):
        estimator.fit({'validation': 's3://some/thing'})

    # Passing an unknown channel should fail???
    with pytest.raises(ValueError):
        estimator.fit({
            'training': 's3://some/data',
            'training2': 's3://some/other/data'
        })
Пример #22
0
def test_algorithm_required_hyperparameters_not_provided(sagemaker_session):
    hyperparameters = [{
        'Description': 'A continuous hyperparameter',
        'Type': 'Categorical',
        'Name': 'hp1',
        'Range': {
            'CategoricalParameterRangeSpecification': {
                'Values': ['TF', 'MXNet']
            }
        },
        'IsTunable': True,
        'IsRequired': True,
    }, {
        'Name': 'hp2',
        'Description': 'A continuous hyperparameter',
        'Type': 'Categorical',
        'IsTunable': False,
        'IsRequired': True
    }]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification'][
        'SupportedHyperParameters'] = hyperparameters

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # hp1 is required and was not provided
    with pytest.raises(ValueError):
        estimator.set_hyperparameters(hp2='TF2')

    # Calling fit with unset required hyperparameters should fail
    # this covers the use case of not calling set_hyperparameters() explicitly
    with pytest.raises(ValueError):
        estimator.fit({'training': 's3://some/place'})
Пример #23
0
def test_algorithm_trainining_channels_with_invalid_channels(session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # Passing only validation should fail as training is required.
    with pytest.raises(ValueError):
        estimator.fit({"validation": "s3://some/thing"})

    # Passing an unknown channel should fail???
    with pytest.raises(ValueError):
        estimator.fit({
            "training": "s3://some/data",
            "training2": "s3://some/other/data"
        })
Пример #24
0
def test_algorithm_trainining_channels_with_expected_channels(
        sagemaker_session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.xlarge',
        train_instance_count=1,
        sagemaker_session=sagemaker_session,
    )

    # Pass training and validation channels. This should work
    estimator.fit({
        'training': 's3://some/place',
        'validation': 's3://some/other'
    })

    # Passing only the training channel. Validation is optional so this should also work.
    estimator.fit({'training': 's3://some/place'})
Пример #25
0
def test_algorithm_trainining_channels_with_expected_channels(session):
    training_channels = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)

    training_channels["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=training_channels)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
        role="SageMakerRole",
        train_instance_type="ml.m4.xlarge",
        train_instance_count=1,
        sagemaker_session=session,
    )

    # Pass training and validation channels. This should work
    estimator.fit({
        "training": "s3://some/place",
        "validation": "s3://some/other"
    })

    # Passing only the training channel. Validation is optional so this should also work.
    estimator.fit({"training": "s3://some/place"})
def test_algorithm_hyperparameter_integer_range_invalid_range(session):
    hyperparameters = [{
        'Description':
        'Grow a tree with max_leaf_nodes in best-first fashion.',
        'Type': 'Integer',
        'Name': 'max_leaf_nodes',
        'Range': {
            'IntegerParameterRangeSpecification': {
                'MinValue': '1',
                'MaxValue': '100000'
            }
        },
        'IsTunable': True,
        'IsRequired': False,
        'DefaultValue': '100',
    }]

    some_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    some_algo['TrainingSpecification'][
        'SupportedHyperParameters'] = hyperparameters

    session.sagemaker_client.describe_algorithm = Mock(return_value=some_algo)

    estimator = AlgorithmEstimator(
        algorithm_arn=
        'arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
        role='SageMakerRole',
        train_instance_type='ml.m4.2xlarge',
        train_instance_count=1,
        sagemaker_session=session,
    )

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(max_leaf_nodes=0)

    with pytest.raises(ValueError):
        estimator.set_hyperparameters(max_leaf_nodes=100001)
Пример #27
0
def test_algorithm_supported_input_mode_with_bad_input_types(sagemaker_session):
    # verify that the Estimator verifies raises exceptions when
    # attempting to train with an incorrect input type

    file_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    file_mode_algo['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File', 'Pipe'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=file_mode_algo)

    # Creating a Pipe mode Estimator with a File mode algorithm should fail.
    with pytest.raises(ValueError):
        AlgorithmEstimator(
            algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
            role='SageMakerRole',
            train_instance_type='ml.m4.xlarge',
            train_instance_count=1,
            input_mode='Pipe',
            sagemaker_session=sagemaker_session,
        )

    pipe_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    pipe_mode_algo['TrainingSpecification']['TrainingChannels'] = [
        {
            'Name': 'training',
            'Description': 'Input channel that provides training data',
            'IsRequired': True,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['Pipe'],
        },
        {
            'Name': 'validation',
            'Description': 'Input channel that provides validation data',
            'IsRequired': False,
            'SupportedContentTypes': ['text/csv'],
            'SupportedCompressionTypes': ['None'],
            'SupportedInputModes': ['File', 'Pipe'],
        },
    ]

    sagemaker_session.sagemaker_client.describe_algorithm = Mock(return_value=pipe_mode_algo)

    # Creating a File mode Estimator with a Pipe mode algorithm should fail.
    with pytest.raises(ValueError):
        AlgorithmEstimator(
            algorithm_arn='arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees',
            role='SageMakerRole',
            train_instance_type='ml.m4.xlarge',
            train_instance_count=1,
            sagemaker_session=sagemaker_session,
        )
Пример #28
0
def test_algorithm_attach_from_hyperparameter_tuning():
    session = Mock()
    job_name = "training-job-that-is-part-of-a-tuning-job"
    algo_arn = "arn:aws:sagemaker:us-east-2:000000000000:algorithm/scikit-decision-trees"
    role_arn = "arn:aws:iam::123412341234:role/SageMakerRole"
    instance_count = 1
    instance_type = "ml.m4.xlarge"
    train_volume_size = 30
    input_mode = "File"

    session.sagemaker_client.list_tags.return_value = {"Tags": []}
    session.sagemaker_client.describe_algorithm.return_value = DESCRIBE_ALGORITHM_RESPONSE
    session.sagemaker_client.describe_training_job.return_value = {
        "TrainingJobName":
        job_name,
        "TrainingJobArn":
        "arn:aws:sagemaker:us-east-2:123412341234:training-job/%s" % job_name,
        "TuningJobArn":
        "arn:aws:sagemaker:us-east-2:123412341234:hyper-parameter-tuning-job/%s"
        % job_name,
        "ModelArtifacts": {
            "S3ModelArtifacts":
            "s3://sagemaker-us-east-2-123412341234/output/model.tar.gz"
        },
        "TrainingJobOutput": {
            "S3TrainingJobOutput":
            "s3://sagemaker-us-east-2-123412341234/output/output.tar.gz"
        },
        "TrainingJobStatus":
        "Succeeded",
        "HyperParameters": {
            "_tuning_objective_metric": "validation:accuracy",
            "max_leaf_nodes": 1,
            "free_text_hp1": "foo",
        },
        "AlgorithmSpecification": {
            "AlgorithmName": algo_arn,
            "TrainingInputMode": input_mode
        },
        "MetricDefinitions": [{
            "Name": "validation:accuracy",
            "Regex": "validation-accuracy: (\\S+)"
        }],
        "RoleArn":
        role_arn,
        "InputDataConfig": [{
            "ChannelName": "training",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri":
                    "s3://sagemaker-us-east-2-123412341234/input/training.csv",
                    "S3DataDistributionType": "FullyReplicated",
                }
            },
            "CompressionType": "None",
            "RecordWrapperType": "None",
        }],
        "OutputDataConfig": {
            "KmsKeyId": "",
            "S3OutputPath": "s3://sagemaker-us-east-2-123412341234/output",
            "RemoveJobNameFromS3OutputPath": False,
        },
        "ResourceConfig": {
            "InstanceType": instance_type,
            "InstanceCount": instance_count,
            "VolumeSizeInGB": train_volume_size,
        },
        "StoppingCondition": {
            "MaxRuntimeInSeconds": 86400
        },
    }

    estimator = AlgorithmEstimator.attach(job_name, sagemaker_session=session)
    assert estimator.hyperparameters() == {
        "max_leaf_nodes": 1,
        "free_text_hp1": "foo"
    }
    assert estimator.algorithm_arn == algo_arn
    assert estimator.role == role_arn
    assert estimator.train_instance_count == instance_count
    assert estimator.train_instance_type == instance_type
    assert estimator.train_volume_size == train_volume_size
    assert estimator.input_mode == input_mode
    assert estimator.sagemaker_session == session
Пример #29
0
def test_algorithm_supported_input_mode_with_bad_input_types(session):
    # verify that the Estimator verifies raises exceptions when
    # attempting to train with an incorrect input type

    file_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    file_mode_algo["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File", "Pipe"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=file_mode_algo)

    # Creating a Pipe mode Estimator with a File mode algorithm should fail.
    with pytest.raises(ValueError):
        AlgorithmEstimator(
            algorithm_arn=
            "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
            role="SageMakerRole",
            train_instance_type="ml.m4.xlarge",
            train_instance_count=1,
            input_mode="Pipe",
            sagemaker_session=session,
        )

    pipe_mode_algo = copy.deepcopy(DESCRIBE_ALGORITHM_RESPONSE)
    pipe_mode_algo["TrainingSpecification"]["TrainingChannels"] = [
        {
            "Name": "training",
            "Description": "Input channel that provides training data",
            "IsRequired": True,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["Pipe"],
        },
        {
            "Name": "validation",
            "Description": "Input channel that provides validation data",
            "IsRequired": False,
            "SupportedContentTypes": ["text/csv"],
            "SupportedCompressionTypes": ["None"],
            "SupportedInputModes": ["File", "Pipe"],
        },
    ]

    session.sagemaker_client.describe_algorithm = Mock(
        return_value=pipe_mode_algo)

    # Creating a File mode Estimator with a Pipe mode algorithm should fail.
    with pytest.raises(ValueError):
        AlgorithmEstimator(
            algorithm_arn=
            "arn:aws:sagemaker:us-east-2:1234:algorithm/scikit-decision-trees",
            role="SageMakerRole",
            train_instance_type="ml.m4.xlarge",
            train_instance_count=1,
            sagemaker_session=session,
        )