示例#1
0
def create_baseline():
    print(f'Baseline data uri: {baseline_data_uri}')
    print(f'Baseline results uri: {baseline_results_uri}')

    my_default_monitor.suggest_baseline(
        baseline_dataset=baseline_data_uri,
        dataset_format=DatasetFormat.csv(header=False),
        output_s3_uri=baseline_results_uri,
        wait=True
    )
示例#2
0
def test_default_model_monitor_suggest_baseline(sagemaker_session):
    my_default_monitor = DefaultModelMonitor(
        role=ROLE,
        instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        volume_size_in_gb=VOLUME_SIZE_IN_GB,
        volume_kms_key=VOLUME_KMS_KEY,
        output_kms_key=OUTPUT_KMS_KEY,
        max_runtime_in_seconds=MAX_RUNTIME_IN_SECONDS,
        base_job_name=BASE_JOB_NAME,
        sagemaker_session=sagemaker_session,
        env=ENVIRONMENT,
        tags=TAGS,
        network_config=NETWORK_CONFIG,
    )

    my_default_monitor.suggest_baseline(
        baseline_dataset=BASELINE_DATASET_PATH,
        dataset_format=DatasetFormat.csv(header=False),
        record_preprocessor_script=PREPROCESSOR_PATH,
        post_analytics_processor_script=POSTPROCESSOR_PATH,
        output_s3_uri=OUTPUT_S3_URI,
        wait=False,
        logs=False,
    )

    assert my_default_monitor.role == ROLE
    assert my_default_monitor.instance_count == INSTANCE_COUNT
    assert my_default_monitor.instance_type == INSTANCE_TYPE
    assert my_default_monitor.volume_size_in_gb == VOLUME_SIZE_IN_GB
    assert my_default_monitor.volume_kms_key == VOLUME_KMS_KEY
    assert my_default_monitor.output_kms_key == OUTPUT_KMS_KEY
    assert my_default_monitor.max_runtime_in_seconds == MAX_RUNTIME_IN_SECONDS
    assert my_default_monitor.base_job_name == BASE_JOB_NAME
    assert my_default_monitor.sagemaker_session == sagemaker_session
    assert my_default_monitor.tags == TAGS
    assert my_default_monitor.network_config == NETWORK_CONFIG
    assert my_default_monitor.image_uri == DEFAULT_IMAGE_URI

    assert BASE_JOB_NAME in my_default_monitor.latest_baselining_job_name
    assert my_default_monitor.latest_baselining_job_name != BASE_JOB_NAME

    assert my_default_monitor.env[ENV_KEY_1] == ENV_VALUE_1
示例#3
0
def main(resources, train_data):

    # configurarion
    AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1')
    AWS_PROFILE = os.getenv('AWS_PROFILE', 'default')
    AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None)
    AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None)
    b3_session, sm_client, sm_runtime, sm_session = get_sm_session(
        region=AWS_DEFAULT_REGION,
        profile_name=AWS_PROFILE,
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY)
    BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts')
    ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role())
    outputs = resources

    bucket = sm_session.default_bucket()
    prefix = "{}/{}".format(BASE_JOB_PREFIX, resources['endpoint']['name'])
    if 'monitor' not in resources:
        raise ValueError("Monitoring not enabled")

    if 's3_capture_upload_path' not in resources['monitor']:
        raise ValueError("Monitoring not enabled")

    baseline_prefix = prefix + "/baselining"
    baseline_data_prefix = baseline_prefix + "/data"
    baseline_results_prefix = baseline_prefix + "/results"
    baseline_data_uri = "s3://{}/{}".format(bucket, baseline_data_prefix)
    baseline_results_uri = "s3://{}/{}".format(bucket, baseline_results_prefix)
    outputs['monitor'].update({
        'baseline': {
            'data_uri': baseline_data_uri,
            'results_uri': baseline_results_uri
        }
    })
    _l.info("Baseline data uri: {}".format(baseline_data_uri))
    _l.info("Baseline results uri: {}".format(baseline_results_uri))

    ground_truth_upload_path = f"s3://{bucket}/{prefix}/ground_truth_data"
    _l.info(f"Ground truth uri: {ground_truth_upload_path}")
    outputs['monitor'].update({'ground truth uri': ground_truth_upload_path})

    # Create a baselining job with training dataset
    _l.info("Executing a baselining job with training dataset")
    _l.info(f"baseline_data_uri: {train_data['baseline']['validate']}")
    my_monitor = ModelQualityMonitor(
        role=ROLE_ARN,
        sagemaker_session=sm_session,
        max_runtime_in_seconds=1800  # 30 minutes
    )
    my_monitor.suggest_baseline(
        baseline_dataset=train_data['baseline']['validate'] + "/baseline.csv",
        dataset_format=DatasetFormat.csv(header=True),
        problem_type="Regression",
        inference_attribute="prediction",
        ground_truth_attribute="label",
        output_s3_uri=baseline_results_uri,
        wait=True)
    baseline_job = my_monitor.latest_baselining_job
    _l.info("suggested baseline contrains")
    _l.info(
        pprint.pformat(baseline_job.suggested_constraints().
                       body_dict["regression_constraints"]))
    _l.info("suggested baseline statistics")
    _l.info(
        pprint.pformat(baseline_job.baseline_statistics().
                       body_dict["regression_metrics"]))

    monitor_schedule_name = (
        f"{BASE_JOB_PREFIX}-mq-sch-{datetime.datetime.utcnow():%Y-%m-%d-%H%M}")
    _l.info(f"Monitoring schedule name: {monitor_schedule_name}")
    outputs['monitor'].update({'schedule_name': monitor_schedule_name})
    endpointInput = EndpointInput(
        resources['endpoint']['name'],
        "/opt/ml/processing/input_data",
        inference_attribute='0'  # REVIEW:
    )

    my_monitor.create_monitoring_schedule(
        monitor_schedule_name=monitor_schedule_name,
        endpoint_input=endpointInput,
        output_s3_uri=baseline_results_uri,
        problem_type="Regression",
        ground_truth_input=ground_truth_upload_path,
        constraints=baseline_job.suggested_constraints(),
        # run the scheduler hourly
        schedule_cron_expression=CronExpressionGenerator.hourly(),
        enable_cloudwatch_metrics=True,
    )
    mq_schedule_details = my_monitor.describe_schedule()
    while mq_schedule_details['MonitoringScheduleStatus'] == 'Pending':
        _l.info(f'Waiting for {monitor_schedule_name}')
        time.sleep(3)
        mq_schedule_details = my_monitor.describe_schedule()
    _l.debug(
        f"Model Quality Monitor - schedule details: {pprint.pformat(mq_schedule_details)}"
    )
    _l.info(
        f"Model Quality Monitor - schedule status: {mq_schedule_details['MonitoringScheduleStatus']}"
    )

    # save outputs to a file
    with open('deploymodel_out.json', 'w') as f:
        json.dump(outputs, f, default=json_default)
def create_baseline_step(input_data, execution_input, region, role):
    # Define the enviornment
    dataset_format = DatasetFormat.csv()
    env = {
        "dataset_format": json.dumps(dataset_format),
        "dataset_source": "/opt/ml/processing/input/baseline_dataset_input",
        "output_path": "/opt/ml/processing/output",
        "publish_cloudwatch_metrics":
        "Disabled",  # Have to be disabled from processing job?
    }

    # Define the inputs and outputs
    inputs = [
        ProcessingInput(
            source=input_data["BaselineUri"],
            destination="/opt/ml/processing/input/baseline_dataset_input",
            input_name="baseline_dataset_input",
        ),
    ]
    outputs = [
        ProcessingOutput(
            source="/opt/ml/processing/output",
            destination=execution_input["BaselineOutputUri"],
            output_name="monitoring_output",
        ),
    ]

    # Get the default model monitor container
    monor_monitor_container_uri = retrieve(region=region,
                                           framework="model-monitor",
                                           version="latest")

    # Create the processor
    monitor_analyzer = Processor(
        image_uri=monor_monitor_container_uri,
        role=role,
        instance_count=1,
        instance_type="ml.m5.xlarge",
        max_runtime_in_seconds=1800,
        env=env,
    )

    # Create the processing step
    baseline_step = steps.sagemaker.ProcessingStep(
        "Baseline Job",
        processor=monitor_analyzer,
        job_name=execution_input["BaselineJobName"],
        inputs=inputs,
        outputs=outputs,
        experiment_config={
            "ExperimentName":
            execution_input["ExperimentName"],  # '$.ExperimentName',
            "TrialName": execution_input["TrialName"],
            "TrialComponentDisplayName": "Baseline",
        },
        tags={
            "GitBranch": execution_input["GitBranch"],
            "GitCommitHash": execution_input["GitCommitHash"],
            "DataVersionId": execution_input["DataVersionId"],
        },
    )

    # Add the catch
    baseline_step.add_catch(
        steps.states.Catch(
            error_equals=["States.TaskFailed"],
            next_step=stepfunctions.steps.states.Fail(
                "Baseline failed", cause="SageMakerBaselineJobFailed"),
        ))
    return baseline_step
示例#5
0
baseline_data_path = 's3://{0}/{1}/monitoring/baselining/data'.format(bucket_name, prefix)
baseline_results_path = 's3://{0}/{1}/monitoring/baselining/results'.format(bucket_name, prefix)

print(baseline_data_path)
print(baseline_results_path)

my_default_monitor = DefaultModelMonitor(
    role=execution_role,
    instance_count=1,
    instance_type='ml.c5.4xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
)

my_default_monitor.suggest_baseline(
    job_name=job_name, 
    baseline_dataset=baseline_data_path,
    dataset_format=DatasetFormat.csv(header=True),
    output_s3_uri=baseline_results_path,
    logs=False, # Disable to avoid noisy logging, only meaningful when wait=True
    wait=True
)

# save environment variables

with open( './cloud_formation/suggest_baseline.vars', 'w' ) as f:
    f.write("export PROCESSING_JOB_NAME={0}\n".format(job_name))

end = time.time()
print('Monitor baseline complete in: {}'.format(end - start))
示例#6
0
print('Baseline data is at {}'.format(baseline_data_uri))

my_default_monitor = DefaultModelMonitor(
    role=get_execution_role(sagemaker_session=sagemaker_session),
    sagemaker_session=sagemaker_session,
    instance_count=2,
    instance_type='ml.m5.4xlarge',
    volume_size_in_gb=60,
    max_runtime_in_seconds=1800,
)


my_default_monitor.suggest_baseline(
    baseline_dataset=baseline_data_uri,
    dataset_format=DatasetFormat.csv(header=False),
    output_s3_uri=baseline_results_uri,
    wait=True
)

print('Model data baseline suggested at {}'.format(baseline_results_uri))

import datetime as datetime
from time import gmtime, strftime

mon_schedule_name = '{}-{}'.format(mon_schedule_name_base, datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S"))

s3_report_path = f's3://{bucket}/{prefix}/monitor/report'

# Setup daily Cron job schedule 
print(f"Attempting to create monitoring schedule as {mon_schedule_name} \n")