Exemplo n.º 1
0
def test_constraints_object_creation_from_file_path_without_customizations():
    constraints = Constraints.from_file_path(
        constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json")
    )

    assert constraints.file_s3_uri.startswith("s3://")
    assert constraints.file_s3_uri.endswith("constraints.json")

    assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled"
Exemplo n.º 2
0
def test_constraints_object_creation_from_file_path_with_customizations(
    sagemaker_session, monitoring_files_kms_key
):
    constraints = Constraints.from_file_path(
        constraints_file_path=os.path.join(tests.integ.DATA_DIR, "monitor/constraints.json"),
        kms_key=monitoring_files_kms_key,
        sagemaker_session=sagemaker_session,
    )

    assert constraints.file_s3_uri.startswith("s3://")
    assert constraints.file_s3_uri.endswith("constraints.json")

    assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Enabled"

    constraints.set_monitoring(False)

    assert constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Disabled"

    constraints.set_monitoring(True, "message")

    assert (
        constraints.body_dict["features"][0]["string_constraints"]["monitoring_config_overrides"][
            "evaluate_constraints"
        ]
        == "Enabled"
    )

    constraints.set_monitoring(True, "second_message")

    assert (
        constraints.body_dict["features"][0]["string_constraints"]["monitoring_config_overrides"][
            "evaluate_constraints"
        ]
        == "Enabled"
    )

    constraints.save()

    new_constraints = Constraints.from_s3_uri(constraints.file_s3_uri)

    assert new_constraints.body_dict["monitoring_config"]["evaluate_constraints"] == "Disabled"
def test_one_step_data_bias_pipeline_constraint_violation(
    sagemaker_session,
    role,
    pipeline_name,
    check_job_config,
    data_bias_check_config,
    supplied_baseline_constraints_uri_param,
):
    data_bias_supplied_baseline_constraints = Constraints.from_file_path(
        constraints_file_path=os.path.join(
            DATA_DIR,
            "pipeline/clarify_check_step/data_bias/bad_cases/analysis.json"),
        sagemaker_session=sagemaker_session,
    ).file_s3_uri
    data_bias_check_step = ClarifyCheckStep(
        name="DataBiasCheckStep",
        clarify_check_config=data_bias_check_config,
        check_job_config=check_job_config,
        skip_check=False,
        register_new_baseline=False,
        supplied_baseline_constraints=supplied_baseline_constraints_uri_param,
    )
    pipeline = Pipeline(
        name=pipeline_name,
        steps=[data_bias_check_step],
        parameters=[supplied_baseline_constraints_uri_param],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]
        monitoring_analysis_cfg_json = S3Downloader.read_file(
            data_bias_check_config.monitoring_analysis_config_uri,
            sagemaker_session,
        )
        monitoring_analysis_cfg = json.loads(monitoring_analysis_cfg_json)

        assert monitoring_analysis_cfg is not None and len(
            monitoring_analysis_cfg) > 0

        for _ in retries(
                max_retry_count=5,
                exception_message_prefix=
                "Waiting for a successful execution of pipeline",
                seconds_to_sleep=10,
        ):
            execution = pipeline.start(parameters={
                "SuppliedBaselineConstraintsUri":
                data_bias_supplied_baseline_constraints
            }, )
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if _CHECK_FAIL_ERROR_MSG not in failure_reason:
                logging.error(
                    f"Pipeline execution failed with error: {failure_reason}. Retrying.."
                )
                continue
            assert execution_steps[0]["StepName"] == "DataBiasCheckStep"
            assert execution_steps[0]["StepStatus"] == "Failed"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_one_step_model_quality_pipeline_constraint_violation(
    sagemaker_session,
    role,
    pipeline_name,
    check_job_config,
    supplied_baseline_statistics_uri_param,
    supplied_baseline_constraints_uri_param,
    model_quality_check_config,
    model_quality_supplied_baseline_statistics,
):
    model_quality_supplied_baseline_constraints = Constraints.from_file_path(
        constraints_file_path=os.path.join(
            DATA_DIR, "pipeline/quality_check_step/model_quality/bad_cases/constraints.json"
        ),
        sagemaker_session=sagemaker_session,
    ).file_s3_uri
    model_quality_check_step = QualityCheckStep(
        name="ModelQualityCheckStep",
        register_new_baseline=False,
        skip_check=False,
        quality_check_config=model_quality_check_config,
        check_job_config=check_job_config,
        supplied_baseline_statistics=supplied_baseline_statistics_uri_param,
        supplied_baseline_constraints=supplied_baseline_constraints_uri_param,
    )
    pipeline = Pipeline(
        name=pipeline_name,
        steps=[model_quality_check_step],
        parameters=[
            supplied_baseline_statistics_uri_param,
            supplied_baseline_constraints_uri_param,
        ],
        sagemaker_session=sagemaker_session,
    )

    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        for _ in retries(
            max_retry_count=5,
            exception_message_prefix="Waiting for a successful execution of pipeline",
            seconds_to_sleep=10,
        ):
            execution = pipeline.start(
                parameters={
                    "SuppliedBaselineStatisticsUri": model_quality_supplied_baseline_statistics,
                    "SuppliedBaselineConstraintsUri": model_quality_supplied_baseline_constraints,
                }
            )
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if _CHECK_FAIL_ERROR_MSG not in failure_reason:
                logging.error(f"Pipeline execution failed with error: {failure_reason}. Retrying..")
                continue
            assert execution_steps[0]["StepName"] == "ModelQualityCheckStep"
            assert execution_steps[0]["StepStatus"] == "Failed"
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass
def test_one_step_data_quality_pipeline_happycase(
    sagemaker_session,
    role,
    pipeline_name,
    check_job_config,
    supplied_baseline_statistics_uri_param,
    supplied_baseline_constraints_uri_param,
    data_quality_check_config,
    data_quality_supplied_baseline_statistics,
):
    data_quality_supplied_baseline_constraints = Constraints.from_file_path(
        constraints_file_path=os.path.join(
            DATA_DIR, "pipeline/quality_check_step/data_quality/good_cases/constraints.json"
        ),
        sagemaker_session=sagemaker_session,
    ).file_s3_uri
    data_quality_check_step = QualityCheckStep(
        name="DataQualityCheckStep",
        skip_check=False,
        register_new_baseline=False,
        quality_check_config=data_quality_check_config,
        check_job_config=check_job_config,
        supplied_baseline_statistics=supplied_baseline_statistics_uri_param,
        supplied_baseline_constraints=supplied_baseline_constraints_uri_param,
    )
    pipeline = Pipeline(
        name=pipeline_name,
        steps=[data_quality_check_step],
        parameters=[
            supplied_baseline_statistics_uri_param,
            supplied_baseline_constraints_uri_param,
        ],
        sagemaker_session=sagemaker_session,
    )
    try:
        response = pipeline.create(role)
        create_arn = response["PipelineArn"]

        for _ in retries(
            max_retry_count=5,
            exception_message_prefix="Waiting for a successful execution of pipeline",
            seconds_to_sleep=10,
        ):
            execution = pipeline.start(
                parameters={
                    "SuppliedBaselineStatisticsUri": data_quality_supplied_baseline_statistics,
                    "SuppliedBaselineConstraintsUri": data_quality_supplied_baseline_constraints,
                }
            )
            response = execution.describe()

            assert response["PipelineArn"] == create_arn

            try:
                execution.wait(delay=30, max_attempts=60)
            except WaiterError:
                pass
            execution_steps = execution.list_steps()

            assert len(execution_steps) == 1
            failure_reason = execution_steps[0].get("FailureReason", "")
            if failure_reason != "":
                logging.error(f"Pipeline execution failed with error: {failure_reason}. Retrying..")
                continue
            assert execution_steps[0]["StepName"] == "DataQualityCheckStep"
            assert execution_steps[0]["StepStatus"] == "Succeeded"
            data_qual_metadata = execution_steps[0]["Metadata"]["QualityCheck"]
            assert not data_qual_metadata["SkipCheck"]
            assert not data_qual_metadata["RegisterNewBaseline"]
            assert not data_qual_metadata.get("ViolationReport", "")
            assert (
                data_qual_metadata["BaselineUsedForDriftCheckConstraints"]
                == data_quality_supplied_baseline_constraints
            )
            assert (
                data_qual_metadata["BaselineUsedForDriftCheckStatistics"]
                == data_quality_supplied_baseline_statistics
            )
            assert (
                data_qual_metadata["BaselineUsedForDriftCheckConstraints"]
                != data_qual_metadata["CalculatedBaselineConstraints"]
            )
            assert (
                data_qual_metadata["BaselineUsedForDriftCheckStatistics"]
                != data_qual_metadata["CalculatedBaselineStatistics"]
            )
            break
    finally:
        try:
            pipeline.delete()
        except Exception:
            pass