def main():

    print('Starting model training.')
    print('Note: if launching for the first time in local mode, container image download might take a few minutes to complete.')

    sklearn = SKLearn(
        entry_point="scikit_boston_housing.py",
        source_dir='code',
        framework_version="0.23-1",
        instance_type="local",
        role=DUMMY_IAM_ROLE
    )

    delta_lake_profile_file = "file://./profile/open-datasets.share"

    sklearn.fit({"train": delta_lake_profile_file})
    print('Completed model training')

    print('Deploying endpoint in local mode')
    predictor = sklearn.deploy(initial_instance_count=1, instance_type='local')

    test_sample = [[0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98]]
    prediction = predictor.predict(test_sample)
    print(f'Prediction: {prediction}')

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
Exemplo n.º 2
0
def test_training_with_network_isolation(sagemaker_session,
                                         sklearn_full_version,
                                         cpu_instance_type):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            train_instance_type=cpu_instance_type,
            framework_version=sklearn_full_version,
            py_version=PYTHON_VERSION,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
            enable_network_isolation=True,
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")
        job_name = unique_name_from_base("test-sklearn-hp")

        sklearn.fit({
            "train": train_input,
            "test": test_input
        },
                    job_name=job_name)
        assert sagemaker_session.sagemaker_client.describe_training_job(
            TrainingJobName=job_name)["EnableNetworkIsolation"]
        return sklearn.latest_training_job.name
Exemplo n.º 3
0
def main(args):
    print("args.local=", args.local)
    # Initialise SDK
    sklearn_estimator = SKLearn(
        entry_point='src/train_and_deploy.py',
        role=CLOUD_CONFIG['sagemaker_role_id']['value'],
        train_instance_type='local' if args.local else 'ml.m4.xlarge',
        hyperparameters={
            'sagemaker_submit_directory':
            f"s3://{CLOUD_CONFIG['s3bucket']['value']}",
        },
        framework_version='0.23-1',
        metric_definitions=[{
            'Name': 'train:score',
            'Regex': 'train:score=(\S+)'
        }],
    )
    # Run model training job
    sklearn_estimator.fit({
        'train':
        "file://./data/data.csv"
        if args.local else f"s3://{CLOUD_CONFIG['s3bucket']['value']}/data.csv"
    })

    # Deploy trained model to an endpoint
    sklearn_estimator.deploy(
        instance_type='local' if args.local else 'ml.t2.medium',
        initial_instance_count=1,
        endpoint_name='demo-endpoint',
    )
Exemplo n.º 4
0
def test_failed_training_job(
    sagemaker_session,
    sklearn_latest_version,
    sklearn_latest_py_version,
    cpu_instance_type,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist",
                                   "failure_script.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=sklearn_latest_version,
            py_version=sklearn_latest_py_version,
            instance_count=1,
            instance_type=cpu_instance_type,
            sagemaker_session=sagemaker_session,
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        job_name = unique_name_from_base("test-sklearn-failed")

        with pytest.raises(ValueError):
            sklearn.fit(train_input, job_name=job_name)
Exemplo n.º 5
0
def test_training_with_additional_hyperparameters(
    sagemaker_session,
    sklearn_latest_version,
    sklearn_latest_py_version,
    cpu_instance_type,
):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")
        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            instance_type=cpu_instance_type,
            framework_version=sklearn_latest_version,
            py_version=sklearn_latest_py_version,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")
        job_name = unique_name_from_base("test-sklearn-hp")

        sklearn.fit({
            "train": train_input,
            "test": test_input
        },
                    job_name=job_name)
def main():
    download_training_and_eval_data()

    print('Starting model training.')
    print(
        'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.'
    )

    sklearn = SKLearn(
        entry_point="scikit_learn_iris.py",
        source_dir='code',
        framework_version="0.23-1",
        instance_type="local",
        role=DUMMY_IAM_ROLE,
        hyperparameters={"max_leaf_nodes": 30},
    )

    train_input = "file://./data/iris.csv"

    sklearn.fit({"train": train_input})
    print('Completed model training')

    print('Deploying endpoint in local mode')
    predictor = sklearn.deploy(initial_instance_count=1, instance_type='local')

    do_inference_on_local_endpoint(predictor)

    print('About to delete the endpoint to stop paying (if in cloud mode).')
    predictor.delete_endpoint(predictor.endpoint_name)
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    model = sklearn.create_model(role=new_role,
                                 model_server_workers=model_server_workers,
                                 vpc_config_override=vpc_config)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
Exemplo n.º 8
0
def test_create_model_from_estimator(sagemaker_session, sklearn_version):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      framework_version=sklearn_version,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir)

    job_name = 'new_name'
    sklearn.fit(inputs='s3://mybucket/train', job_name=job_name)
    model = sklearn.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == sklearn_version
    assert model.py_version == sklearn.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == job_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
Exemplo n.º 9
0
def main():
    download_training_and_eval_data()

    print('Starting model training.')
    print(
        'Note: if launching for the first time in local mode, container image download might take a few minutes to complete.'
    )

    sklearn = SKLearn(
        entry_point="catboost_train_deploy.py",
        source_dir='code',
        framework_version="0.23-1",
        instance_type="local",
        role=DUMMY_IAM_ROLE,
    )

    train_location = 'file://' + local_train
    validation_location = 'file://' + local_validation

    sklearn.fit({'train': train_location, 'validation': validation_location})
    print('Completed model training')

    print('Deploying endpoint in local mode')
    predictor = sklearn.deploy(1, 'local', serializer=csv_serializer)

    with open(local_test, 'r') as f:
        payload = f.read().strip()

    predictions = predictor.predict(payload)
    print('predictions: {}'.format(predictions))

    predictor.delete_endpoint(predictor.endpoint)
Exemplo n.º 10
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    enable_cloudwatch_metrics = 'true'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir,
                      enable_cloudwatch_metrics=enable_cloudwatch_metrics)

    sklearn.fit(inputs='s3://mybucket/train', job_name='new_name')

    new_role = 'role'
    model_server_workers = 2
    vpc_config = {'Subnets': ['foo'], 'SecurityGroupIds': ['bar']}
    model = sklearn.create_model(role=new_role,
                                 model_server_workers=model_server_workers,
                                 vpc_config_override=vpc_config)

    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
Exemplo n.º 11
0
def _run_mnist_training_job(sagemaker_session,
                            instance_type,
                            sklearn_full_version,
                            wait=True):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):

        script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")

        data_path = os.path.join(DATA_DIR, "sklearn_mnist")

        sklearn = SKLearn(
            entry_point=script_path,
            role="SageMakerRole",
            framework_version=sklearn_full_version,
            py_version=PYTHON_VERSION,
            train_instance_type=instance_type,
            sagemaker_session=sagemaker_session,
            hyperparameters={"epochs": 1},
        )

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "train"),
            key_prefix="integ-test-data/sklearn_mnist/train")
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, "test"),
            key_prefix="integ-test-data/sklearn_mnist/test")
        job_name = unique_name_from_base("test-sklearn-mnist")

        sklearn.fit({
            "train": train_input,
            "test": test_input
        },
                    wait=wait,
                    job_name=job_name)
        return sklearn.latest_training_job.name
def _run_mnist_training_job(sagemaker_session,
                            instance_type,
                            sklearn_full_version,
                            wait=True):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):

        script_path = os.path.join(DATA_DIR, 'sklearn_mnist', 'mnist.py')

        data_path = os.path.join(DATA_DIR, 'sklearn_mnist')

        sklearn = SKLearn(entry_point=script_path,
                          role='SageMakerRole',
                          framework_version=sklearn_full_version,
                          py_version=PYTHON_VERSION,
                          train_instance_type=instance_type,
                          sagemaker_session=sagemaker_session,
                          hyperparameters={'epochs': 1})

        train_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'train'),
            key_prefix='integ-test-data/sklearn_mnist/train')
        test_input = sklearn.sagemaker_session.upload_data(
            path=os.path.join(data_path, 'test'),
            key_prefix='integ-test-data/sklearn_mnist/test')

        sklearn.fit({'train': train_input, 'test': test_input}, wait=wait)
        return sklearn.latest_training_job.name
Exemplo n.º 13
0
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        py_version=PYTHON_VERSION,
        framework_version=sklearn_version,
    )

    inputs = "s3://mybucket/train"

    sklearn.fit(inputs=inputs, experiment_config=EXPERIMENT_CONFIG)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ["train", "logs_for_job"]
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ["resource"]

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args["input_config"][0]["DataSource"]["S3DataSource"][
        "S3Uri"] = inputs
    expected_train_args["experiment_config"] = EXPERIMENT_CONFIG

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = (
        "246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}"
    )
    assert {
        "Environment": {
            "SAGEMAKER_SUBMIT_DIRECTORY":
            "s3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz".
            format(TIMESTAMP),
            "SAGEMAKER_PROGRAM":
            "dummy_script.py",
            "SAGEMAKER_ENABLE_CLOUDWATCH_METRICS":
            "false",
            "SAGEMAKER_REGION":
            "us-west-2",
            "SAGEMAKER_CONTAINER_LOG_LEVEL":
            "20",
        },
        "Image": expected_image_base.format(sklearn_version, PYTHON_VERSION),
        "ModelDataUrl": "s3://m/m.tar.gz",
    } == model.prepare_container_def(CPU)

    assert "cpu" in model.prepare_container_def(CPU)["Image"]
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
Exemplo n.º 14
0
def test_sklearn(strftime, sagemaker_session, sklearn_version):
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      py_version=PYTHON_VERSION,
                      framework_version=sklearn_version)

    inputs = 's3://mybucket/train'

    sklearn.fit(inputs=inputs)

    sagemaker_call_names = [c[0] for c in sagemaker_session.method_calls]
    assert sagemaker_call_names == ['train', 'logs_for_job']
    boto_call_names = [
        c[0] for c in sagemaker_session.boto_session.method_calls
    ]
    assert boto_call_names == ['resource']

    expected_train_args = _create_train_job(sklearn_version)
    expected_train_args['input_config'][0]['DataSource']['S3DataSource'][
        'S3Uri'] = inputs

    actual_train_args = sagemaker_session.method_calls[0][2]
    assert actual_train_args == expected_train_args

    model = sklearn.create_model()

    expected_image_base = '246618743249.dkr.ecr.us-west-2.amazonaws.com/sagemaker-scikit-learn:{}-cpu-{}'
    assert {
        'Environment': {
            'SAGEMAKER_SUBMIT_DIRECTORY':
            's3://mybucket/sagemaker-scikit-learn-{}/source/sourcedir.tar.gz'.
            format(TIMESTAMP),
            'SAGEMAKER_PROGRAM':
            'dummy_script.py',
            'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS':
            'false',
            'SAGEMAKER_REGION':
            'us-west-2',
            'SAGEMAKER_CONTAINER_LOG_LEVEL':
            '20'
        },
        'Image': expected_image_base.format(sklearn_version, PYTHON_VERSION),
        'ModelDataUrl': 's3://m/m.tar.gz'
    } == model.prepare_container_def(CPU)

    assert 'cpu' in model.prepare_container_def(CPU)['Image']
    predictor = sklearn.deploy(1, CPU)
    assert isinstance(predictor, SKLearnPredictor)
Exemplo n.º 15
0
def build_and_train_estimator(data_desc: str,
                              classifier: str,
                              count: int = 1,
                              wait: bool = False,
                              **hyperparams: object) -> Tuple[SKLearn, str]:
    """
    Creates or returns an existing sagemaker training job

    :param data_desc: name of data to use (unique)
    :param classifier: name of sklearn classifier
    :param count: cache buster
    :param wait: waits on job, useful for debugging
    :param hyperparams: hyperparameters for the model
    :return: estimator | None
    """
    model_name = build_model_name(data_desc, classifier, hyperparams, count)
    print('model_name', model_name)

    # check if model has already been built on this data
    #  if it has check if it's finished and attach
    try:
        import boto3
        client = boto3.client('sagemaker')
        response = client.describe_training_job(TrainingJobName=model_name)
        if wait or response['TrainingJobStatus'] in ['Completed', 'Failed']:
            return SKLearn.attach(model_name), model_name
        else:
            raise Warning(f'{model_name} isn\'t finished training yet')
    except ClientError:
        pass

    output_location = f's3://{bucket}/{S3_MODEL_DIR / data_desc}'
    estimator = SKLearn('train_and_deploy.py',
                        source_dir='sagemaker_container',
                        code_location=output_location,
                        output_path=output_location,
                        train_instance_type=TRAIN_INSTANCE,
                        framework_version='0.23-1',
                        role=role,
                        hyperparameters={
                            'classifier': classifier,
                            **hyperparams
                        })

    estimator.fit(f's3://{bucket}/{S3_FEATURE_DIR / data_desc}',
                  wait=wait,
                  job_name=model_name)

    return estimator, model_name
Exemplo n.º 16
0
def test_failed_training_job(sagemaker_session, sklearn_full_version):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'sklearn_mnist', 'failure_script.py')
        data_path = os.path.join(DATA_DIR, 'sklearn_mnist')

        sklearn = SKLearn(entry_point=script_path, role='SageMakerRole',
                          framework_version=sklearn_full_version, py_version=PYTHON_VERSION,
                          train_instance_count=1, train_instance_type='ml.c4.xlarge',
                          sagemaker_session=sagemaker_session)

        train_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                            key_prefix='integ-test-data/sklearn_mnist/train')
        job_name = unique_name_from_base('test-sklearn-failed')

        with pytest.raises(ValueError):
            sklearn.fit(train_input, job_name=job_name)
Exemplo n.º 17
0
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = 's3://mybucket/source'
    custom_image = 'ubuntu:latest'
    sklearn = SKLearn(entry_point=SCRIPT_PATH,
                      role=ROLE,
                      sagemaker_session=sagemaker_session,
                      train_instance_type=INSTANCE_TYPE,
                      image_name=custom_image,
                      container_log_level=container_log_level,
                      py_version=PYTHON_VERSION,
                      base_job_name='job',
                      source_dir=source_dir)

    sklearn.fit(inputs='s3://mybucket/train', job_name='new_name')
    model = sklearn.create_model()

    assert model.image == custom_image
Exemplo n.º 18
0
def test_transform_multiple_values_for_entry_point_issue(sagemaker_session, sklearn_version):
    # https://github.com/aws/sagemaker-python-sdk/issues/974
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        py_version=PYTHON_VERSION,
        framework_version=sklearn_version,
    )

    inputs = "s3://mybucket/train"

    sklearn.fit(inputs=inputs)

    transformer = sklearn.transformer(instance_count=1, instance_type="ml.m4.xlarge")
    # if we got here, we didn't get a "multiple values" error
    assert transformer is not None
Exemplo n.º 19
0
def test_create_model_with_optional_params(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    enable_cloudwatch_metrics = "true"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        train_instance_type=INSTANCE_TYPE,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
        enable_cloudwatch_metrics=enable_cloudwatch_metrics,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    custom_image = "ubuntu:latest"
    new_role = "role"
    model_server_workers = 2
    vpc_config = {"Subnets": ["foo"], "SecurityGroupIds": ["bar"]}
    new_source_dir = "s3://myotherbucket/source"
    dependencies = ["/directory/a", "/directory/b"]
    model_name = "model-name"
    model = sklearn.create_model(
        image=custom_image,
        role=new_role,
        model_server_workers=model_server_workers,
        vpc_config_override=vpc_config,
        entry_point=SERVING_SCRIPT_FILE,
        source_dir=new_source_dir,
        dependencies=dependencies,
        name=model_name,
    )

    assert model.image == custom_image
    assert model.role == new_role
    assert model.model_server_workers == model_server_workers
    assert model.vpc_config == vpc_config
    assert model.entry_point == SERVING_SCRIPT_FILE
    assert model.source_dir == new_source_dir
    assert model.dependencies == dependencies
    assert model.name == model_name
Exemplo n.º 20
0
def test_create_model_with_custom_image(sagemaker_session):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    custom_image = "ubuntu:latest"
    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        image_uri=custom_image,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name="job",
        source_dir=source_dir,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")
    model = sklearn.create_model()

    assert model.image_uri == custom_image
Exemplo n.º 21
0
def test_training_with_additional_hyperparameters(sagemaker_session, sklearn_full_version):
    with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
        script_path = os.path.join(DATA_DIR, 'sklearn_mnist', 'mnist.py')
        data_path = os.path.join(DATA_DIR, 'sklearn_mnist')

        sklearn = SKLearn(entry_point=script_path,
                          role='SageMakerRole',
                          train_instance_type="ml.c4.xlarge",
                          framework_version=sklearn_full_version,
                          py_version=PYTHON_VERSION,
                          sagemaker_session=sagemaker_session,
                          hyperparameters={'epochs': 1})

        train_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'train'),
                                                            key_prefix='integ-test-data/sklearn_mnist/train')
        test_input = sklearn.sagemaker_session.upload_data(path=os.path.join(data_path, 'test'),
                                                           key_prefix='integ-test-data/sklearn_mnist/test')
        job_name = unique_name_from_base('test-sklearn-hp')

        sklearn.fit({'train': train_input, 'test': test_input}, job_name=job_name)
        return sklearn.latest_training_job.name
def test_create_model_from_estimator(name_from_base, sagemaker_session,
                                     sklearn_version):
    container_log_level = '"logging.INFO"'
    source_dir = "s3://mybucket/source"
    base_job_name = "job"

    sklearn = SKLearn(
        entry_point=SCRIPT_PATH,
        role=ROLE,
        sagemaker_session=sagemaker_session,
        instance_type=INSTANCE_TYPE,
        framework_version=sklearn_version,
        container_log_level=container_log_level,
        py_version=PYTHON_VERSION,
        base_job_name=base_job_name,
        source_dir=source_dir,
        enable_network_isolation=True,
    )

    sklearn.fit(inputs="s3://mybucket/train", job_name="new_name")

    model_name = "model_name"
    name_from_base.return_value = model_name
    model = sklearn.create_model()

    assert model.sagemaker_session == sagemaker_session
    assert model.framework_version == sklearn_version
    assert model.py_version == sklearn.py_version
    assert model.entry_point == SCRIPT_PATH
    assert model.role == ROLE
    assert model.name == model_name
    assert model.container_log_level == container_log_level
    assert model.source_dir == source_dir
    assert model.vpc_config is None
    assert model.enable_network_isolation()

    name_from_base.assert_called_with(base_job_name)
Exemplo n.º 23
0
inters_df.consultant.portfolio = sub_port(consultant_processing( \
                                            list(inters_df.consultant)).portfolio)

inters_df.consultant = cons_predictor(consultant_processing( \
                                        list(inters_df.consultant)))

inters_df = pd.concat([
    inters_df.drop(["client", "duration", "ongoing", "n_transactions"],
                   axis=1),
    client_processing(list(inters_df.client))
],
                      axis=1)

inters_df.to_csv(key + "interactions.csv")

upload_file(key + "interactions.csv")

models = {}

for name, df in inters_df.groupby("consultant"):

    model = SKLearn(entry_point="training_scripts.py",
                    train_instance_type="ml.c4.xlarge",
                    role=role,
                    sagemaker_session=sagemaker_session,
                    hyperparameters={"normalize": True})

    model_fit = model.fit({"train": df})
    models[name] = model_fit.deploy(initial_instance_count=1,
                                    instance_type="ml.m4.xlarge")
Exemplo n.º 24
0
from sagemaker.sklearn import SKLearn
# Initialise SDK
sklearn_estimator = SKLearn(
    entry_point='train_and_deploy.py',
    role='arn:aws:iam::<your-sagemaker-role>',
    # train_instance_type='ml.m4.xlarge',
    train_instance_type='local',
    output_path='s3://<path-to-output-dir>/',
    hyperparameters={
        'sagemaker_submit_directory':
        's3://<path-to-sagemaker_submit_directory>'
    },
    code_location='s3://<path-to-code_location>',
    framework_version='0.20.0')
# Run model training job
sklearn_estimator.fit({'train': 's3://<path-to-training-data-dir>'})
# Deploy trained model to an endpoint
predictor = sklearn_estimator.deploy(
    # instance_type='ml.t2.medium',
    instance_type='local',
    initial_instance_count=1,
    endpoint_name='<your-end-point-name>',
)