def monitor_frequency(interval=frequency, hour_interval=None, starting_hour=None): # this allows users to define the frequency of data drift monitoring if interval == "daily": monitoring_frequency = CronExpressionGenerator.daily() if interval == "hourly": monitoring_frequency = CronExpressionGenerator.hourly() if interval == "others": monitoring_frequency = CronExpressionGenerator.daily_every_x_hours( hour_interval, starting_hour ) return monitoring_frequency
from tests.integ.retry import retries XGBOOST_DATA_PATH = os.path.join(DATA_DIR, "xgboost_model") ENDPOINT_INPUT_LOCAL_PATH = "/opt/ml/processing/input/endpoint" PROBABILITY_THRESHOLD = 0.5005 PROBLEM_TYPE = "Regression" INFERENCE_ATTRIBUTE = "0" HEADER_OF_LABEL = "Label" HEADER_OF_PREDICTED_LABEL = "Prediction" HEADERS_OF_FEATURES = ["F1", "F2", "F3", "F4", "F5", "F6", "F7"] ALL_HEADERS = [ *HEADERS_OF_FEATURES, HEADER_OF_LABEL, HEADER_OF_PREDICTED_LABEL ] CRON = "cron(0 * * * ? *)" UPDATED_CRON = CronExpressionGenerator.daily() MAX_RUNTIME_IN_SECONDS = 30 * 60 UPDATED_MAX_RUNTIME_IN_SECONDS = 25 * 60 ROLE = "SageMakerRole" INSTANCE_COUNT = 1 INSTANCE_TYPE = "ml.c5.xlarge" VOLUME_SIZE_IN_GB = 100 START_TIME_OFFSET = "-PT1H" END_TIME_OFFSET = "-PT0H" TEST_TAGS = [{"Key": "integration", "Value": "test"}] # TODO: Remove this workaround once the API service fix is deployed to Prod TEST_ENV = {"problem_type": PROBLEM_TYPE} @pytest.yield_fixture(scope="module") def endpoint_name(sagemaker_session):
def main(resources, train_data): # configurarion AWS_DEFAULT_REGION = os.getenv('AWS_DEFAULT_REGION', 'eu-west-1') AWS_PROFILE = os.getenv('AWS_PROFILE', 'default') AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', None) AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY', None) b3_session, sm_client, sm_runtime, sm_session = get_sm_session( region=AWS_DEFAULT_REGION, profile_name=AWS_PROFILE, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) BASE_JOB_PREFIX = os.getenv('BASE_JOB_PREFIX', 'sts') ROLE_ARN = os.getenv('AWS_ROLE', sagemaker.get_execution_role()) outputs = resources bucket = sm_session.default_bucket() prefix = "{}/{}".format(BASE_JOB_PREFIX, resources['endpoint']['name']) if 'monitor' not in resources: raise ValueError("Monitoring not enabled") if 's3_capture_upload_path' not in resources['monitor']: raise ValueError("Monitoring not enabled") baseline_prefix = prefix + "/baselining" baseline_data_prefix = baseline_prefix + "/data" baseline_results_prefix = baseline_prefix + "/results" baseline_data_uri = "s3://{}/{}".format(bucket, baseline_data_prefix) baseline_results_uri = "s3://{}/{}".format(bucket, baseline_results_prefix) outputs['monitor'].update({ 'baseline': { 'data_uri': baseline_data_uri, 'results_uri': baseline_results_uri } }) _l.info("Baseline data uri: {}".format(baseline_data_uri)) _l.info("Baseline results uri: {}".format(baseline_results_uri)) ground_truth_upload_path = f"s3://{bucket}/{prefix}/ground_truth_data" _l.info(f"Ground truth uri: {ground_truth_upload_path}") outputs['monitor'].update({'ground truth uri': ground_truth_upload_path}) # Create a baselining job with training dataset _l.info("Executing a baselining job with training dataset") _l.info(f"baseline_data_uri: {train_data['baseline']['validate']}") my_monitor = ModelQualityMonitor( role=ROLE_ARN, sagemaker_session=sm_session, max_runtime_in_seconds=1800 # 30 minutes ) my_monitor.suggest_baseline( baseline_dataset=train_data['baseline']['validate'] + "/baseline.csv", dataset_format=DatasetFormat.csv(header=True), problem_type="Regression", inference_attribute="prediction", ground_truth_attribute="label", output_s3_uri=baseline_results_uri, wait=True) baseline_job = my_monitor.latest_baselining_job _l.info("suggested baseline contrains") _l.info( pprint.pformat(baseline_job.suggested_constraints(). body_dict["regression_constraints"])) _l.info("suggested baseline statistics") _l.info( pprint.pformat(baseline_job.baseline_statistics(). body_dict["regression_metrics"])) monitor_schedule_name = ( f"{BASE_JOB_PREFIX}-mq-sch-{datetime.datetime.utcnow():%Y-%m-%d-%H%M}") _l.info(f"Monitoring schedule name: {monitor_schedule_name}") outputs['monitor'].update({'schedule_name': monitor_schedule_name}) endpointInput = EndpointInput( resources['endpoint']['name'], "/opt/ml/processing/input_data", inference_attribute='0' # REVIEW: ) my_monitor.create_monitoring_schedule( monitor_schedule_name=monitor_schedule_name, endpoint_input=endpointInput, output_s3_uri=baseline_results_uri, problem_type="Regression", ground_truth_input=ground_truth_upload_path, constraints=baseline_job.suggested_constraints(), # run the scheduler hourly schedule_cron_expression=CronExpressionGenerator.hourly(), enable_cloudwatch_metrics=True, ) mq_schedule_details = my_monitor.describe_schedule() while mq_schedule_details['MonitoringScheduleStatus'] == 'Pending': _l.info(f'Waiting for {monitor_schedule_name}') time.sleep(3) mq_schedule_details = my_monitor.describe_schedule() _l.debug( f"Model Quality Monitor - schedule details: {pprint.pformat(mq_schedule_details)}" ) _l.info( f"Model Quality Monitor - schedule status: {mq_schedule_details['MonitoringScheduleStatus']}" ) # save outputs to a file with open('deploymodel_out.json', 'w') as f: json.dump(outputs, f, default=json_default)
print('Model data baseline suggested at {}'.format(baseline_results_uri)) import datetime as datetime from time import gmtime, strftime mon_schedule_name = '{}-{}'.format(mon_schedule_name_base, datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S")) s3_report_path = f's3://{bucket}/{prefix}/monitor/report' # Setup daily Cron job schedule print(f"Attempting to create monitoring schedule as {mon_schedule_name} \n") try: my_default_monitor.create_monitoring_schedule( monitor_schedule_name=mon_schedule_name, endpoint_input=endpoint_name, output_s3_uri=s3_report_path, statistics=my_default_monitor.baseline_statistics(), constraints=my_default_monitor.suggested_constraints(), schedule_cron_expression=CronExpressionGenerator.daily(), enable_cloudwatch_metrics=True, ) desc_schedule_result = my_default_monitor.describe_schedule() print('Created monitoring schedule. Schedule status: {}'.format(desc_schedule_result['MonitoringScheduleStatus'])) except: my_default_monitor.update_monitoring_schedule( endpoint_input=endpoint_name, schedule_cron_expression=CronExpressionGenerator.daily() ) print("Monitoring schedule already exists for endpoint. Updating schedule.")
def test_cron_expression_generator_daily_every_x_hours_returns_expected_value_when_called_with_customizations(): assert ( CronExpressionGenerator.daily_every_x_hours(hour_interval=7, starting_hour=8) == "cron(0 8/7 ? * * *)" )
def test_cron_expression_generator_daily_every_x_hours_returns_expected_value_when_called_without_customizations(): assert CronExpressionGenerator.daily_every_x_hours(hour_interval=6) == "cron(0 0/6 ? * * *)"
def test_cron_expression_generator_daily_returns_expected_value_when_called_with_parameters(): assert CronExpressionGenerator.daily(hour=5) == "cron(0 5 ? * * *)"
def test_cron_expression_generator_hourly_returns_expected_value(): assert CronExpressionGenerator.hourly() == "cron(0 * ? * * *)"
max_runtime_in_seconds=3600, ) def create_baseline(): print(f'Baseline data uri: {baseline_data_uri}') print(f'Baseline results uri: {baseline_results_uri}') my_default_monitor.suggest_baseline( baseline_dataset=baseline_data_uri, dataset_format=DatasetFormat.csv(header=False), output_s3_uri=baseline_results_uri, wait=True ) mon_schedule_name = 'xgb-boston-pred-model-monitor-schedule-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime()) my_default_monitor.create_monitoring_schedule( monitor_schedule_name=mon_schedule_name, endpoint_input=endpoint_name, output_s3_uri=baseline_results_uri.replace('baseline_results', 'monitor_reports'), statistics=baseline_results_uri + '/statistics.json', constraints=baseline_results_uri + '/constraints.json', schedule_cron_expression=CronExpressionGenerator.hourly(), enable_cloudwatch_metrics=True, )