def test_continuous_parameter_ranges():
    cont_param = ContinuousParameter(0.1, 1e-2)
    ranges = cont_param.as_tuning_range('some')
    assert len(ranges.keys()) == 3
    assert ranges['Name'] == 'some'
    assert ranges['MinValue'] == '0.1'
    assert ranges['MaxValue'] == '0.01'
Exemplo n.º 2
0
def test_continuous_parameter_ranges():
    cont_param = ContinuousParameter(0.1, 1e-2)
    ranges = cont_param.as_tuning_range("some")
    assert len(ranges.keys()) == 4
    assert ranges["Name"] == "some"
    assert ranges["MinValue"] == "0.1"
    assert ranges["MaxValue"] == "0.01"
    assert ranges["ScalingType"] == "Auto"
Exemplo n.º 3
0
class AwsLinearLearner(AwsEstimator):
    container_name: str = "linear-learner"
    name: str = "linear_learner"
    default_hyperparameter_tuning: Dict[str, Any] = {
        "learning_rate": ContinuousParameter(0.01, 0.2),
        "mini_batch_size": IntegerParameter(250, 5000),
        "use_bias": CategoricalParameter([True, False]),
    }
    default_tuning_job_config = {
        "max_jobs": 20,
        "max_parallel_jobs": 3,
        "objective_metric_name": "validation:objective_loss",
        "objective_type": "Minimize",
    }

    def _load_results(self, file_name: str) -> DataFrame:
        """
        Extension of the results to remove the score dict
        Arguments and return value the same as superclass
        """
        initial_df = super()._load_results(file_name)
        for _, row in initial_df.iterrows():
            try:
                row[0] = row[0].replace('{"score":', "").replace("}", "")
            except IndexError:
                pass
        initial_df = initial_df.astype("float32")
        return initial_df
Exemplo n.º 4
0
def test_hyperparameter_optimization_happy_case():
    with patch(
            'boto3.Session'
    ):
        with patch(
                'sagemaker.Session'
        ) as mocked_sagemaker_session:
            sagemaker_session_instance = mocked_sagemaker_session.return_value

            with patch(
                    'sagemaker.get_execution_role',
                    return_value='arn_role'
            ):
                with patch(
                        'sagemaker.estimator.Estimator'
                ) as mocked_sagemaker_estimator:
                    with patch(
                            'sagify.sagemaker.sagemaker.SageMakerClient._construct_image_location',
                            return_value='image-full-name'
                    ):
                        with patch(
                                'sagemaker.tuner.HyperparameterTuner'
                        ) as mocked_sagemaker_tuner:
                            sage_maker_client = sagemaker.SageMakerClient('sagemaker', 'us-east-1')
                            sage_maker_client.hyperparameter_optimization(
                                image_name='image',
                                input_s3_data_location='s3://bucket/input',
                                instance_count=1,
                                instance_type='m1.xlarge',
                                volume_size=30,
                                max_run=60,
                                max_jobs=3,
                                max_parallel_jobs=2,
                                output_path='s3://bucket/output',
                                objective_type='Maximize',
                                objective_metric_name='Precision',
                                hyperparams_ranges_dict={
                                    'lr': ContinuousParameter(0.001, 0.1),
                                    'batch-size': CategoricalParameter([32, 64, 128, 256, 512])
                                },
                                base_job_name="Some-job-name-prefix",
                                job_name="some job name"
                            )
                            mocked_sagemaker_estimator.assert_called_with(
                                image_name='image-full-name',
                                role='arn_role',
                                train_instance_count=1,
                                train_instance_type='m1.xlarge',
                                train_volume_size=30,
                                train_max_run=60,
                                input_mode='File',
                                output_path='s3://bucket/output',
                                sagemaker_session=sagemaker_session_instance
                            )

                            mocked_sagemaker_tuner_instance = mocked_sagemaker_tuner.return_value
                            assert mocked_sagemaker_tuner_instance.fit.call_count == 1
                            mocked_sagemaker_tuner_instance.fit.assert_called_with(
                                's3://bucket/input', job_name='some job name'
                            )
Exemplo n.º 5
0
def test_estimator_metric_definitions():
    estimator = DummyEstimator(model="hello",
                               job="world",
                               hyperparameters=dict())

    hyperparameter_ranges = {"sample": ContinuousParameter(1.0, 2.0)}
    tuner = estimator.get_sagemaker_tuner(
        hyperparameter_ranges=hyperparameter_ranges)

    assert tuner.metric_definitions == estimator.get_tuner_metric_definitions()
Exemplo n.º 6
0
def test_estimator_get_sagemaker_tuner_default_values():
    estimator = DummyEstimator(model="hello",
                               job="world",
                               hyperparameters=dict())

    hyperparameter_ranges = {"sample": ContinuousParameter(1.0, 2.0)}
    tuner = estimator.get_sagemaker_tuner(
        hyperparameter_ranges=hyperparameter_ranges)

    assert tuner.objective_type == "Minimize"
    assert tuner.max_jobs == 1
    assert tuner.max_parallel_jobs == 1
Exemplo n.º 7
0
    def _prepare_parameter_ranges(cls, parameter_ranges):
        ranges = {}

        for parameter in parameter_ranges['CategoricalParameterRanges']:
            ranges[parameter['Name']] = CategoricalParameter(parameter['Values'])

        for parameter in parameter_ranges['ContinuousParameterRanges']:
            ranges[parameter['Name']] = ContinuousParameter(float(parameter['MinValue']), float(parameter['MaxValue']))

        for parameter in parameter_ranges['IntegerParameterRanges']:
            ranges[parameter['Name']] = IntegerParameter(int(parameter['MinValue']), int(parameter['MaxValue']))

        return ranges
Exemplo n.º 8
0
    def _prepare_parameter_ranges(cls, parameter_ranges):
        ranges = {}

        for parameter in parameter_ranges["CategoricalParameterRanges"]:
            ranges[parameter["Name"]] = CategoricalParameter(
                parameter["Values"])

        for parameter in parameter_ranges["ContinuousParameterRanges"]:
            ranges[parameter["Name"]] = ContinuousParameter(
                float(parameter["MinValue"]), float(parameter["MaxValue"]))

        for parameter in parameter_ranges["IntegerParameterRanges"]:
            ranges[parameter["Name"]] = IntegerParameter(
                int(parameter["MinValue"]), int(parameter["MaxValue"]))

        return ranges
Exemplo n.º 9
0
    def test_sagemaker_transform_step_successfully(self, m_default_bucket):

        m_default_bucket.return_value = "sagemaker-bucket-name"

        with DataJobStack(scope=self.app, id="some-stack", stage="stg") as djs:
            transformer = Transformer(
                model_name="some-model",
                instance_count=1,
                instance_type="ml.t2.medium",
                sagemaker_session=self.sagemaker_session,
            )

            transform_step = TransformStep(
                datajob_stack=djs,
                name="transform-job",
                transformer=transformer,
                data="s3://some-bucket/some-data.csv",
            )

            estimator = SKLearn(
                entry_point=str(
                    pathlib.Path(current_dir, "resources", "train.py")),
                train_instance_type="ml.m5.xlarge",
                role=self.role,
                framework_version="0.20.0",
                py_version="py3",
                sagemaker_session=self.sagemaker_session,
            )

            tuner = HyperparameterTuner(
                estimator=estimator,
                hyperparameter_ranges={
                    "alpha": ContinuousParameter(0.0001, 0.05)
                },
                objective_metric_name="rmse",
            )

            tuner_step = TuningStep(
                datajob_stack=djs,
                name="tuning-step",
                tuner=tuner,
                data="s3://some-bucket/some-data.csv",
            )

            with StepfunctionsWorkflow(djs, "sequential") as sfn_workflow:
                transform_step >> tuner_step
Exemplo n.º 10
0
def _read_hyperparams_ranges_config(hyperparams_config_file_path):
    if not os.path.isfile(hyperparams_config_file_path):
        raise ValueError("The given hyperparams file {} doens't exist".format(
            hyperparams_config_file_path))

    with open(hyperparams_config_file_path) as _in_file:
        hyperparams_config_dict = json.load(_in_file)

    if 'ParameterRanges' not in hyperparams_config_dict:
        raise ValueError("ParameterRanges not in the hyperparams file")

    parameter_ranges_dict = hyperparams_config_dict['ParameterRanges']

    if not parameter_ranges_dict:
        raise ValueError("Empty ParameterRanges in the hyperparams file")

    if 'ObjectiveMetric' not in hyperparams_config_dict and 'Name' not in hyperparams_config_dict[
            'ObjectiveMetric']:
        raise ValueError("ObjectiveMetric not in the hyperparams file")

    objective_name = hyperparams_config_dict['ObjectiveMetric']['Name']
    objective_type = hyperparams_config_dict['ObjectiveMetric']['Type']

    hyperparameter_ranges = {}

    categorical_param_ranges_dict = parameter_ranges_dict[
        'CategoricalParameterRanges']
    for _dict in categorical_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = CategoricalParameter(
            _dict['Values'])

    integer_param_ranges_dict = parameter_ranges_dict['IntegerParameterRanges']
    for _dict in integer_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = IntegerParameter(
            _dict['MinValue'], _dict['MaxValue'])

    continuous_param_ranges_dict = parameter_ranges_dict[
        'ContinuousParameterRanges']
    for _dict in continuous_param_ranges_dict:
        hyperparameter_ranges[_dict['Name']] = ContinuousParameter(
            _dict['MinValue'], _dict['MaxValue'])

    return objective_name, objective_type, hyperparameter_ranges
Exemplo n.º 11
0
def test_continuous_parameter():
    cont_param = ContinuousParameter(0.1, 1e-2)
    assert isinstance(cont_param, ParameterRange)
    assert cont_param.__name__ == "Continuous"
Exemplo n.º 12
0
REGION = "us-west-2"
BUCKET_NAME = "Some-Bucket"
ROLE = "myrole"
IMAGE_NAME = "image"

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = "ml.c4.xlarge"
NUM_COMPONENTS = 5

SCRIPT_NAME = "my_script.py"
FRAMEWORK_VERSION = "1.0.0"

INPUTS = "s3://mybucket/train"
OBJECTIVE_METRIC_NAME = "mock_metric"
HYPERPARAMETER_RANGES = {
    "validated": ContinuousParameter(0, 5),
    "elizabeth": IntegerParameter(0, 5),
    "blank": CategoricalParameter([0, 5]),
}
METRIC_DEFINITIONS = "mock_metric_definitions"

TUNING_JOB_DETAILS = {
    "HyperParameterTuningJobConfig": {
        "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1},
        "HyperParameterTuningJobObjective": {
            "MetricName": OBJECTIVE_METRIC_NAME,
            "Type": "Minimize",
        },
        "Strategy": "Bayesian",
        "ParameterRanges": {
            "CategoricalParameterRanges": [],
JOB_NAME = 'tuning_job'
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {'validated': ContinuousParameter(0, 5),
                         'elizabeth': IntegerParameter(0, 5),
                         'blank': CategoricalParameter([0, 5])}
METRIC_DEFINITIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },
        'Strategy': 'Bayesian',
Exemplo n.º 14
0
def test_continuous_parameter_scaling_type():
    cont_param = ContinuousParameter(0.1, 2, scaling_type='ReverseLogarithmic')
    cont_range = cont_param.as_tuning_range('range')
    assert cont_range['ScalingType'] == 'ReverseLogarithmic'
# In[ ]:

from sagemaker.parameter import (
    CategoricalParameter,
    ContinuousParameter,
    IntegerParameter,
    ParameterRange,
)
from sagemaker.amazon.hyperparameter import Hyperparameter
from sagemaker.tuner import HyperparameterTuner
import sagemaker

hyperparameter_ranges = {
    'learning_rate': ContinuousParameter(0.0001,
                                         0.1,
                                         scaling_type='Logarithmic'),
    'use_bias': CategoricalParameter([True, False])
}

# Next, you'll specify the objective metric that you'd like to tune and its definition, which includes the regular expression (regex) needed to extract that metric from the Amazon CloudWatch logs of the training job.
#
# Because you are using the built-in linear learner algorithm, it emits two predefined metrics that you have used before: **test: mse** and **test: absolute_loss**. You will elect to monitor **test:mse**. In this case, you only need to specify the metric name and do not need to provide regex. If you bring your own algorithm, your algorithm emits metrics by itself. In that case, you would need to add a metric definition object to define the format of those metrics through regex, so that Amazon SageMaker knows how to extract those metrics from your CloudWatch logs.

# In[ ]:

objective_metric_name = 'test:mse'
objective_type = 'Minimize'

# Now, create a HyperparameterTuner object, to which you will pass the following:
# - The Linear_model estimator created previously
Exemplo n.º 16
0
def test_continuous_parameter_scaling_type():
    cont_param = ContinuousParameter(0.1, 2, scaling_type="ReverseLogarithmic")
    cont_range = cont_param.as_tuning_range("range")
    assert cont_range["ScalingType"] == "ReverseLogarithmic"
REGION = 'us-west-2'
BUCKET_NAME = 'Some-Bucket'
ROLE = 'myrole'
IMAGE_NAME = 'image'

TRAIN_INSTANCE_COUNT = 1
TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge'
NUM_COMPONENTS = 5

SCRIPT_NAME = 'my_script.py'
FRAMEWORK_VERSION = '1.0.0'

INPUTS = 's3://mybucket/train'
OBJECTIVE_METRIC_NAME = 'mock_metric'
HYPERPARAMETER_RANGES = {
    'validated': ContinuousParameter(0, 5),
    'elizabeth': IntegerParameter(0, 5),
    'blank': CategoricalParameter([0, 5])
}
METRIC_DEFINTIONS = 'mock_metric_definitions'

TUNING_JOB_DETAILS = {
    'HyperParameterTuningJobConfig': {
        'ResourceLimits': {
            'MaxParallelTrainingJobs': 1,
            'MaxNumberOfTrainingJobs': 1
        },
        'HyperParameterTuningJobObjective': {
            'MetricName': OBJECTIVE_METRIC_NAME,
            'Type': 'Minimize'
        },