def test_integer_parameter_ranges(): int_param = IntegerParameter(1, 2) ranges = int_param.as_tuning_range('some') assert len(ranges.keys()) == 3 assert ranges['Name'] == 'some' assert ranges['MinValue'] == '1' assert ranges['MaxValue'] == '2'
def test_integer_parameter_ranges(): int_param = IntegerParameter(1, 2) ranges = int_param.as_tuning_range("some") assert len(ranges.keys()) == 4 assert ranges["Name"] == "some" assert ranges["MinValue"] == "1" assert ranges["MaxValue"] == "2" assert ranges["ScalingType"] == "Auto"
def test_tuning_kmeans_fsx(efs_fsx_setup, sagemaker_session, cpu_instance_type): subnets = [efs_fsx_setup.subnet_id] security_group_ids = efs_fsx_setup.security_group_ids role = efs_fsx_setup.role_name kmeans = KMeans( role=role, train_instance_count=TRAIN_INSTANCE_COUNT, train_instance_type=cpu_instance_type, k=K, sagemaker_session=sagemaker_session, subnets=subnets, security_group_ids=security_group_ids, ) hyperparameter_ranges = { "extra_center_factor": IntegerParameter(4, 10), "mini_batch_size": IntegerParameter(10, 100), "epochs": IntegerParameter(1, 2), "init_method": CategoricalParameter(["kmeans++", "random"]), } with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): tuner = HyperparameterTuner( estimator=kmeans, objective_metric_name=OBJECTIVE_METRIC_NAME, hyperparameter_ranges=hyperparameter_ranges, objective_type="Minimize", max_jobs=MAX_JOBS, max_parallel_jobs=MAX_PARALLEL_JOBS, ) file_system_fsx_id = efs_fsx_setup.file_system_fsx_id train_records = FileSystemRecordSet( file_system_id=file_system_fsx_id, file_system_type="FSxLustre", directory_path=FSX_DIR_PATH, num_records=NUM_RECORDS, feature_dim=FEATURE_DIM, ) test_records = FileSystemRecordSet( file_system_id=file_system_fsx_id, file_system_type="FSxLustre", directory_path=FSX_DIR_PATH, num_records=NUM_RECORDS, feature_dim=FEATURE_DIM, channel="test", ) job_name = unique_name_from_base("tune-kmeans-fsx") tuner.fit([train_records, test_records], job_name=job_name) tuner.wait() best_training_job = tuner.best_training_job() assert best_training_job
def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker_role_arn, sfn_role_arn): job_name = generate_job_name() kmeans = KMeans(role=sagemaker_role_arn, instance_count=1, instance_type=INSTANCE_TYPE, k=10) hyperparameter_ranges = { "extra_center_factor": IntegerParameter(4, 10), "mini_batch_size": IntegerParameter(10, 100), "epochs": IntegerParameter(1, 2), "init_method": CategoricalParameter(["kmeans++", "random"]), } tuner = HyperparameterTuner( estimator=kmeans, objective_metric_name="test:msd", hyperparameter_ranges=hyperparameter_ranges, objective_type="Minimize", max_jobs=2, max_parallel_jobs=2, ) # Build workflow definition tuning_step = TuningStep('Tuning', tuner=tuner, job_name=job_name, data=record_set_for_hyperparameter_tuning) tuning_step.add_retry(SAGEMAKER_RETRY_STRATEGY) workflow_graph = Chain([tuning_step]) with timeout(minutes=DEFAULT_TIMEOUT_MINUTES): # Create workflow and check definition workflow = create_workflow_and_check_definition( workflow_graph=workflow_graph, workflow_name=unique_name_from_base( "integ-test-tuning-step-workflow"), sfn_client=sfn_client, sfn_role_arn=sfn_role_arn) # Execute workflow execution = workflow.execute() execution_output = execution.get_output(wait=True) # Check workflow output assert execution_output.get( "HyperParameterTuningJobStatus") == "Completed" # Cleanup state_machine_delete_wait(sfn_client, workflow.state_machine_arn)
def test_fit_pca(sagemaker_session, tuner): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) pca.algorithm_mode = 'randomized' pca.subtract_mean = True pca.extra_components = 5 tuner.estimator = pca tags = [{'Name': 'some-tag-without-a-value'}] tuner.tags = tags hyperparameter_ranges = {'num_components': IntegerParameter(2, 4), 'algorithm_mode': CategoricalParameter(['regular', 'randomized'])} tuner._hyperparameter_ranges = hyperparameter_ranges records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert len(tune_kwargs['static_hyperparameters']) == 4 assert tune_kwargs['static_hyperparameters']['extra_components'] == '5' assert len(tune_kwargs['parameter_ranges']['IntegerParameterRanges']) == 1 assert tune_kwargs['job_name'].startswith('pca') assert tune_kwargs['tags'] == tags assert tune_kwargs['early_stopping_type'] == 'Off' assert tuner.estimator.mini_batch_size == 9999
def test_s3_input_mode(sagemaker_session, tuner): expected_input_mode = 'Pipe' script_path = os.path.join(DATA_DIR, 'mxnet_mnist', 'failure_script.py') mxnet = MXNet(entry_point=script_path, role=ROLE, framework_version=FRAMEWORK_VERSION, train_instance_count=TRAIN_INSTANCE_COUNT, train_instance_type=TRAIN_INSTANCE_TYPE, sagemaker_session=sagemaker_session) tuner.estimator = mxnet tags = [{'Name': 'some-tag-without-a-value'}] tuner.tags = tags hyperparameter_ranges = { 'num_components': IntegerParameter(2, 4), 'algorithm_mode': CategoricalParameter(['regular', 'randomized']) } tuner._hyperparameter_ranges = hyperparameter_ranges tuner.fit(inputs=s3_input('s3://mybucket/train_manifest', input_mode=expected_input_mode)) actual_input_mode = sagemaker_session.method_calls[1][2]['input_mode'] assert actual_input_mode == expected_input_mode
def test_s3_input_mode(sagemaker_session, tuner): expected_input_mode = "Pipe" script_path = os.path.join(DATA_DIR, "mxnet_mnist", "failure_script.py") mxnet = MXNet( entry_point=script_path, role=ROLE, framework_version=FRAMEWORK_VERSION, train_instance_count=TRAIN_INSTANCE_COUNT, train_instance_type=TRAIN_INSTANCE_TYPE, sagemaker_session=sagemaker_session, ) tuner.estimator = mxnet tags = [{"Name": "some-tag-without-a-value"}] tuner.tags = tags hyperparameter_ranges = { "num_components": IntegerParameter(2, 4), "algorithm_mode": CategoricalParameter(["regular", "randomized"]), } tuner._hyperparameter_ranges = hyperparameter_ranges tuner.fit(inputs=s3_input("s3://mybucket/train_manifest", input_mode=expected_input_mode)) actual_input_mode = sagemaker_session.method_calls[1][2]["input_mode"] assert actual_input_mode == expected_input_mode
class AwsLinearLearner(AwsEstimator): container_name: str = "linear-learner" name: str = "linear_learner" default_hyperparameter_tuning: Dict[str, Any] = { "learning_rate": ContinuousParameter(0.01, 0.2), "mini_batch_size": IntegerParameter(250, 5000), "use_bias": CategoricalParameter([True, False]), } default_tuning_job_config = { "max_jobs": 20, "max_parallel_jobs": 3, "objective_metric_name": "validation:objective_loss", "objective_type": "Minimize", } def _load_results(self, file_name: str) -> DataFrame: """ Extension of the results to remove the score dict Arguments and return value the same as superclass """ initial_df = super()._load_results(file_name) for _, row in initial_df.iterrows(): try: row[0] = row[0].replace('{"score":', "").replace("}", "") except IndexError: pass initial_df = initial_df.astype("float32") return initial_df
def test_validate_parameter_ranges_number_validation_error(sagemaker_session): pca = PCA(ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name='pca', sagemaker_session=sagemaker_session) invalid_hyperparameter_ranges = {'num_components': IntegerParameter(-1, 2)} with pytest.raises(ValueError) as e: HyperparameterTuner(estimator=pca, objective_metric_name=OBJECTIVE_METRIC_NAME, hyperparameter_ranges=invalid_hyperparameter_ranges, metric_definitions=METRIC_DEFINITIONS) assert 'Value must be an integer greater than zero' in str(e)
def _prepare_parameter_ranges(cls, parameter_ranges): ranges = {} for parameter in parameter_ranges['CategoricalParameterRanges']: ranges[parameter['Name']] = CategoricalParameter(parameter['Values']) for parameter in parameter_ranges['ContinuousParameterRanges']: ranges[parameter['Name']] = ContinuousParameter(float(parameter['MinValue']), float(parameter['MaxValue'])) for parameter in parameter_ranges['IntegerParameterRanges']: ranges[parameter['Name']] = IntegerParameter(int(parameter['MinValue']), int(parameter['MaxValue'])) return ranges
def test_tuning_tf_lustre( efs_fsx_setup, sagemaker_session, cpu_instance_type, tensorflow_training_latest_version, tensorflow_training_latest_py_version, ): role = efs_fsx_setup["role_name"] subnets = [efs_fsx_setup["subnet_id"]] security_group_ids = efs_fsx_setup["security_group_ids"] estimator = TensorFlow( entry_point=SCRIPT, role=role, instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, framework_version=tensorflow_training_latest_version, py_version=tensorflow_training_latest_py_version, subnets=subnets, security_group_ids=security_group_ids, ) hyperparameter_ranges = {"epochs": IntegerParameter(1, 2)} objective_metric_name = "accuracy" metric_definitions = [{ "Name": objective_metric_name, "Regex": "accuracy = ([0-9\\.]+)" }] tuner = HyperparameterTuner( estimator, objective_metric_name, hyperparameter_ranges, metric_definitions, max_jobs=MAX_JOBS, max_parallel_jobs=MAX_PARALLEL_JOBS, ) file_system_fsx_id = efs_fsx_setup["file_system_fsx_id"] file_system_input = FileSystemInput(file_system_id=file_system_fsx_id, file_system_type="FSxLustre", directory_path=FSX_DIR_PATH) with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES): tuning_job_name = unique_name_from_base( "test-tuning-tf-script-mode-lustre", max_length=32) tuner.fit(file_system_input, job_name=tuning_job_name) time.sleep(15) tuner.wait() best_training_job = tuner.best_training_job() assert best_training_job
def _prepare_parameter_ranges(cls, parameter_ranges): ranges = {} for parameter in parameter_ranges["CategoricalParameterRanges"]: ranges[parameter["Name"]] = CategoricalParameter( parameter["Values"]) for parameter in parameter_ranges["ContinuousParameterRanges"]: ranges[parameter["Name"]] = ContinuousParameter( float(parameter["MinValue"]), float(parameter["MaxValue"])) for parameter in parameter_ranges["IntegerParameterRanges"]: ranges[parameter["Name"]] = IntegerParameter( int(parameter["MinValue"]), int(parameter["MaxValue"])) return ranges
def _read_hyperparams_ranges_config(hyperparams_config_file_path): if not os.path.isfile(hyperparams_config_file_path): raise ValueError("The given hyperparams file {} doens't exist".format( hyperparams_config_file_path)) with open(hyperparams_config_file_path) as _in_file: hyperparams_config_dict = json.load(_in_file) if 'ParameterRanges' not in hyperparams_config_dict: raise ValueError("ParameterRanges not in the hyperparams file") parameter_ranges_dict = hyperparams_config_dict['ParameterRanges'] if not parameter_ranges_dict: raise ValueError("Empty ParameterRanges in the hyperparams file") if 'ObjectiveMetric' not in hyperparams_config_dict and 'Name' not in hyperparams_config_dict[ 'ObjectiveMetric']: raise ValueError("ObjectiveMetric not in the hyperparams file") objective_name = hyperparams_config_dict['ObjectiveMetric']['Name'] objective_type = hyperparams_config_dict['ObjectiveMetric']['Type'] hyperparameter_ranges = {} categorical_param_ranges_dict = parameter_ranges_dict[ 'CategoricalParameterRanges'] for _dict in categorical_param_ranges_dict: hyperparameter_ranges[_dict['Name']] = CategoricalParameter( _dict['Values']) integer_param_ranges_dict = parameter_ranges_dict['IntegerParameterRanges'] for _dict in integer_param_ranges_dict: hyperparameter_ranges[_dict['Name']] = IntegerParameter( _dict['MinValue'], _dict['MaxValue']) continuous_param_ranges_dict = parameter_ranges_dict[ 'ContinuousParameterRanges'] for _dict in continuous_param_ranges_dict: hyperparameter_ranges[_dict['Name']] = ContinuousParameter( _dict['MinValue'], _dict['MaxValue']) return objective_name, objective_type, hyperparameter_ranges
def test_fit_pca(sagemaker_session, tuner): pca = PCA( ROLE, TRAIN_INSTANCE_COUNT, TRAIN_INSTANCE_TYPE, NUM_COMPONENTS, base_job_name="pca", sagemaker_session=sagemaker_session, ) pca.algorithm_mode = "randomized" pca.subtract_mean = True pca.extra_components = 5 tuner.estimator = pca tags = [{"Name": "some-tag-without-a-value"}] tuner.tags = tags hyperparameter_ranges = { "num_components": IntegerParameter(2, 4), "algorithm_mode": CategoricalParameter(["regular", "randomized"]), } tuner._hyperparameter_ranges = hyperparameter_ranges records = RecordSet(s3_data=INPUTS, num_records=1, feature_dim=1) tuner.fit(records, mini_batch_size=9999) _, _, tune_kwargs = sagemaker_session.tune.mock_calls[0] assert len(tune_kwargs["static_hyperparameters"]) == 4 assert tune_kwargs["static_hyperparameters"]["extra_components"] == "5" assert len(tune_kwargs["parameter_ranges"]["IntegerParameterRanges"]) == 1 assert tune_kwargs["job_name"].startswith("pca") assert tune_kwargs["tags"] == tags assert tune_kwargs["early_stopping_type"] == "Off" assert tuner.estimator.mini_batch_size == 9999
def test_integer_parameter(): int_param = IntegerParameter(1, 2) assert isinstance(int_param, ParameterRange) assert int_param.__name__ == "Integer"
def test_model_registration_with_tuning_model( sagemaker_session, role, cpu_instance_type, pipeline_name, region_name, ): base_dir = os.path.join(DATA_DIR, "pytorch_mnist") entry_point = os.path.join(base_dir, "mnist.py") input_path = sagemaker_session.upload_data( path=os.path.join(base_dir, "training"), key_prefix="integ-test-data/pytorch_mnist/training", ) inputs = TrainingInput(s3_data=input_path) instance_count = ParameterInteger(name="InstanceCount", default_value=1) instance_type = ParameterString(name="InstanceType", default_value="ml.m5.xlarge") pytorch_estimator = PyTorch( entry_point=entry_point, role=role, framework_version="1.5.0", py_version="py3", instance_count=instance_count, instance_type=instance_type, sagemaker_session=sagemaker_session, enable_sagemaker_metrics=True, max_retry_attempts=3, ) min_batch_size = ParameterString(name="MinBatchSize", default_value="64") max_batch_size = ParameterString(name="MaxBatchSize", default_value="128") hyperparameter_ranges = { "batch-size": IntegerParameter(min_batch_size, max_batch_size), } tuner = HyperparameterTuner( estimator=pytorch_estimator, objective_metric_name="test:acc", objective_type="Maximize", hyperparameter_ranges=hyperparameter_ranges, metric_definitions=[{"Name": "test:acc", "Regex": "Overall test accuracy: (.*?);"}], max_jobs=2, max_parallel_jobs=2, ) step_tune = TuningStep( name="my-tuning-step", tuner=tuner, inputs=inputs, ) step_register_best = RegisterModel( name="my-model-regis", estimator=pytorch_estimator, model_data=step_tune.get_top_model_s3_uri( top_k=0, s3_bucket=sagemaker_session.default_bucket(), ), content_types=["text/csv"], response_types=["text/csv"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], entry_point=entry_point, ) pipeline = Pipeline( name=pipeline_name, parameters=[instance_count, instance_type, min_batch_size, max_batch_size], steps=[step_tune, step_register_best], sagemaker_session=sagemaker_session, ) try: response = pipeline.create(role) create_arn = response["PipelineArn"] assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}", create_arn, ) for _ in retries( max_retry_count=5, exception_message_prefix="Waiting for a successful execution of pipeline", seconds_to_sleep=10, ): execution = pipeline.start(parameters={}) assert re.match( rf"arn:aws:sagemaker:{region_name}:\d{{12}}:pipeline/{pipeline_name}/execution/", execution.arn, ) try: execution.wait(delay=30, max_attempts=60) except WaiterError: pass execution_steps = execution.list_steps() assert len(execution_steps) == 3 for step in execution_steps: assert step["StepStatus"] == "Succeeded" break finally: try: pipeline.delete() except Exception: pass
BUCKET_NAME = "Some-Bucket" ROLE = "myrole" IMAGE_NAME = "image" TRAIN_INSTANCE_COUNT = 1 TRAIN_INSTANCE_TYPE = "ml.c4.xlarge" NUM_COMPONENTS = 5 SCRIPT_NAME = "my_script.py" FRAMEWORK_VERSION = "1.0.0" INPUTS = "s3://mybucket/train" OBJECTIVE_METRIC_NAME = "mock_metric" HYPERPARAMETER_RANGES = { "validated": ContinuousParameter(0, 5), "elizabeth": IntegerParameter(0, 5), "blank": CategoricalParameter([0, 5]), } METRIC_DEFINITIONS = "mock_metric_definitions" TUNING_JOB_DETAILS = { "HyperParameterTuningJobConfig": { "ResourceLimits": {"MaxParallelTrainingJobs": 1, "MaxNumberOfTrainingJobs": 1}, "HyperParameterTuningJobObjective": { "MetricName": OBJECTIVE_METRIC_NAME, "Type": "Minimize", }, "Strategy": "Bayesian", "ParameterRanges": { "CategoricalParameterRanges": [], "ContinuousParameterRanges": [],
def test_integer_parameter_scaling_type(): int_param = IntegerParameter(2, 3, scaling_type='Linear') int_range = int_param.as_tuning_range('range') assert int_range['ScalingType'] == 'Linear'
REGION = 'us-west-2' BUCKET_NAME = 'Some-Bucket' ROLE = 'myrole' IMAGE_NAME = 'image' TRAIN_INSTANCE_COUNT = 1 TRAIN_INSTANCE_TYPE = 'ml.c4.xlarge' NUM_COMPONENTS = 5 SCRIPT_NAME = 'my_script.py' FRAMEWORK_VERSION = '1.0.0' INPUTS = 's3://mybucket/train' OBJECTIVE_METRIC_NAME = 'mock_metric' HYPERPARAMETER_RANGES = {'validated': ContinuousParameter(0, 5), 'elizabeth': IntegerParameter(0, 5), 'blank': CategoricalParameter([0, 5])} METRIC_DEFINITIONS = 'mock_metric_definitions' TUNING_JOB_DETAILS = { 'HyperParameterTuningJobConfig': { 'ResourceLimits': { 'MaxParallelTrainingJobs': 1, 'MaxNumberOfTrainingJobs': 1 }, 'HyperParameterTuningJobObjective': { 'MetricName': OBJECTIVE_METRIC_NAME, 'Type': 'Minimize' }, 'Strategy': 'Bayesian', 'ParameterRanges': {
SCRIPT_NAME = "my_script.py" FRAMEWORK_VERSION = "1.0.0" INPUTS = "s3://mybucket/train" STRATEGY = ("Bayesian",) OBJECTIVE_TYPE = "Minimize" EARLY_STOPPING_TYPE = "Auto" OBJECTIVE_METRIC_NAME = "mock_metric" OBJECTIVE_METRIC_NAME_TWO = "mock_metric_two" HYPERPARAMETER_RANGES = { "validated": ContinuousParameter(0, 5), "elizabeth": IntegerParameter(0, 5), "blank": CategoricalParameter([0, 5]), } HYPERPARAMETER_RANGES_TWO = { "num_components": IntegerParameter(2, 4), "algorithm_mode": CategoricalParameter(["regular", "randomized"]), } METRIC_DEFINITIONS = "mock_metric_definitions" MAX_JOBS = 10 MAX_PARALLEL_JOBS = 5 TAGS = [{"key1": "value1"}] LIST_TAGS_RESULT = {"Tags": [{"Key": "key1", "Value": "value1"}]}
def test_integer_parameter_scaling_type(): int_param = IntegerParameter(2, 3, scaling_type="Linear") int_range = int_param.as_tuning_range("range") assert int_range["ScalingType"] == "Linear"
def test_tuning_step_with_placeholders(sfn_client, record_set_for_hyperparameter_tuning, sagemaker_role_arn, sfn_role_arn): kmeans = KMeans(role=sagemaker_role_arn, instance_count=1, instance_type=INSTANCE_TYPE, k=10) hyperparameter_ranges = { "extra_center_factor": IntegerParameter(4, 10), "mini_batch_size": IntegerParameter(10, 100), "epochs": IntegerParameter(1, 2), "init_method": CategoricalParameter(["kmeans++", "random"]), } tuner = HyperparameterTuner( estimator=kmeans, objective_metric_name="test:msd", hyperparameter_ranges=hyperparameter_ranges, objective_type="Maximize", max_jobs=2, max_parallel_jobs=1, ) execution_input = ExecutionInput( schema={ 'job_name': str, 'objective_metric_name': str, 'objective_type': str, 'max_jobs': int, 'max_parallel_jobs': int, 'early_stopping_type': str, 'strategy': str, }) parameters = { 'HyperParameterTuningJobConfig': { 'HyperParameterTuningJobObjective': { 'MetricName': execution_input['objective_metric_name'], 'Type': execution_input['objective_type'] }, 'ResourceLimits': { 'MaxNumberOfTrainingJobs': execution_input['max_jobs'], 'MaxParallelTrainingJobs': execution_input['max_parallel_jobs'] }, 'Strategy': execution_input['strategy'], 'TrainingJobEarlyStoppingType': execution_input['early_stopping_type'] }, 'TrainingJobDefinition': { 'AlgorithmSpecification': { 'TrainingInputMode': 'File' } } } # Build workflow definition tuning_step = TuningStep('Tuning', tuner=tuner, job_name=execution_input['job_name'], data=record_set_for_hyperparameter_tuning, parameters=parameters) tuning_step.add_retry(SAGEMAKER_RETRY_STRATEGY) workflow_graph = Chain([tuning_step]) with timeout(minutes=DEFAULT_TIMEOUT_MINUTES): # Create workflow and check definition workflow = create_workflow_and_check_definition( workflow_graph=workflow_graph, workflow_name=unique_name_from_base( "integ-test-tuning-step-workflow"), sfn_client=sfn_client, sfn_role_arn=sfn_role_arn) job_name = generate_job_name() inputs = { 'job_name': job_name, 'objective_metric_name': 'test:msd', 'objective_type': 'Minimize', 'max_jobs': 2, 'max_parallel_jobs': 2, 'early_stopping_type': 'Off', 'strategy': 'Bayesian', } # Execute workflow execution = workflow.execute(inputs=inputs) execution_output = execution.get_output(wait=True) # Check workflow output assert execution_output.get( "HyperParameterTuningJobStatus") == "Completed" # Cleanup state_machine_delete_wait(sfn_client, workflow.state_machine_arn)