def test_completed(self, xgboost_training_job):
        (reference, resource) = xgboost_training_job
        assert k8s.get_resource_exists(reference)

        training_job_name = resource["spec"].get("trainingJobName", None)
        assert training_job_name is not None

        training_job_desc = get_sagemaker_training_job(training_job_name)
        training_job_arn = training_job_desc["TrainingJobArn"]
        assert k8s.get_resource_arn(resource) == training_job_arn

        assert training_job_desc[
            "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_training_status_in_sync(training_job_name, reference,
                                       cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        # model artifact URL is populated
        resource = k8s.get_resource(reference)
        resource["status"]["modelArtifacts"]["s3ModelArtifacts"] is not None

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(training_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
Пример #2
0
    def test_completed(self, xgboost_hpojob):
        (reference, resource) = xgboost_hpojob
        assert k8s.get_resource_exists(reference)

        hpo_job_name = resource["spec"].get("hyperParameterTuningJobName",
                                            None)
        assert hpo_job_name is not None

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        hpo_arn = hpo_sm_desc["HyperParameterTuningJobArn"]
        assert k8s.get_resource_arn(resource) == hpo_arn

        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_hpo_status_in_sync(hpo_job_name, reference,
                                        cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(hpo_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
Пример #3
0
    def test_completed(self, kmeans_processing_job):
        (reference, resource) = kmeans_processing_job
        assert k8s.get_resource_exists(reference)

        processing_job_name = resource["spec"].get("processingJobName", None)
        assert processing_job_name is not None

        processing_job_desc = get_sagemaker_processing_job(processing_job_name)
        processing_job_arn = processing_job_desc["ProcessingJobArn"]
        assert k8s.get_resource_arn(resource) == processing_job_arn

        assert processing_job_desc["ProcessingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_processing_status_in_sync(
            processing_job_name, reference, cfg.JOB_STATUS_COMPLETED
        )
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(processing_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
    def test_create_feature_group(self, feature_group):
        """Tests that a feature group can be created and deleted
        using the Feature Group Controller.
        """
        (reference, resource) = feature_group
        assert k8s.get_resource_exists(reference)

        feature_group_name = resource["spec"].get("featureGroupName", None)
        assert feature_group_name is not None

        feature_group_sm_desc = get_sagemaker_feature_group(feature_group_name)
        feature_group_arn = feature_group_sm_desc["FeatureGroupArn"]

        assert k8s.get_resource_arn(resource) == feature_group_arn

        assert feature_group_sm_desc[
            "FeatureGroupStatus"] == FEATURE_GROUP_STATUS_CREATING

        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_feature_group_status_in_sync(
            feature_group_name, reference, FEATURE_GROUP_STATUS_CREATED)

        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(feature_group_arn, resource_tags)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, WAIT_PERIOD_COUNT,
                                                WAIT_PERIOD_LENGTH)
        assert deleted

        assert get_sagemaker_feature_group(feature_group_name) is None
    def test_smoke(
        self, sagemaker_client, xgboost_churn_model_explainability_job_definition
    ):
        (reference, resource) = xgboost_churn_model_explainability_job_definition
        assert k8s.get_resource_exists(reference)

        job_definition_name = resource["spec"].get("jobDefinitionName")
        job_definition_desc = get_sagemaker_model_explainability_job_definition(
            sagemaker_client, job_definition_name
        )
        job_definition_arn = job_definition_desc["JobDefinitionArn"]
        assert k8s.get_resource_arn(resource) == job_definition_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(job_definition_arn, resource_tags)
        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted
        assert (
            get_sagemaker_model_explainability_job_definition(
                sagemaker_client, job_definition_name
            )
            is None
        )
    def test_create_model_package_group(self, xgboost_model_package_group):
        (reference, resource) = xgboost_model_package_group
        assert k8s.get_resource_exists(reference)

        model_package_group_name = resource["spec"].get(
            "modelPackageGroupName", None)

        assert model_package_group_name is not None
        model_package_group_sm_desc = get_sagemaker_model_package_group(
            model_package_group_name)
        model_package_group_arn = model_package_group_sm_desc[
            "ModelPackageGroupArn"]
        assert k8s.get_resource_arn(resource) == model_package_group_arn

        self._assert_model_package_group_status_in_sync(
            model_package_group_name, reference, cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_package_group_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.DELETE_WAIT_PERIOD,
                                                cfg.DELETE_WAIT_LENGTH)
        assert deleted is True

        assert get_sagemaker_model_package_group(
            model_package_group_name) is None
Пример #7
0
    def test_create_endpoint_config(self, single_variant_config):
        (reference, resource) = single_variant_config
        assert k8s.get_resource_exists(reference)

        config_name = resource["spec"].get("endpointConfigName", None)
        endpoint_config_desc = get_sagemaker_endpoint_config(config_name)
        endpoint_arn = endpoint_config_desc["EndpointConfigArn"]
        assert k8s.get_resource_arn(resource) == endpoint_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(endpoint_arn, resource_tags)
        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted

        assert get_sagemaker_endpoint_config(config_name) is None
Пример #8
0
    def test_completed(self, xgboost_training_job_debugger):
        (reference, resource) = xgboost_training_job_debugger
        assert k8s.get_resource_exists(reference)

        training_job_name = resource["spec"].get("trainingJobName", None)
        assert training_job_name is not None

        training_job_desc = get_sagemaker_training_job(training_job_name)
        training_job_arn = training_job_desc["TrainingJobArn"]

        resource_arn = k8s.get_resource_arn(resource)
        if resource_arn is None:
            logging.error(
                f"ARN for this resource is None, resource status is: {resource['status']}"
            )
        assert resource_arn == training_job_arn

        assert training_job_desc[
            "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_training_status_in_sync(training_job_name, reference,
                                       cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        # Assert debugger rule evaluation completed
        self._assert_training_rule_eval_status_in_sync(
            training_job_name, "DebugRule", reference,
            cfg.RULE_STATUS_COMPLETED)

        # Assert profiler rule evaluation completed
        self._assert_training_rule_eval_status_in_sync(
            training_job_name, "ProfilerRule", reference,
            cfg.RULE_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(training_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
    def test_create_model(self, xgboost_model):
        (reference, resource) = xgboost_model
        assert k8s.get_resource_exists(reference)

        model_name = resource["spec"].get("modelName", None)
        model_desc = get_sagemaker_model(model_name)
        model_arn = model_desc["ModelArn"]
        assert k8s.get_resource_arn(resource) == model_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_arn, resource_tags)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted

        assert get_sagemaker_model(model_name) is None
    def create_endpoint_test(self, xgboost_endpoint):
        (reference, resource, _) = xgboost_endpoint
        assert k8s.get_resource_exists(reference)

        # endpoint has correct arn and status
        endpoint_name = resource["spec"].get("endpointName", None)
        assert endpoint_name is not None

        endpoint_desc = get_sagemaker_endpoint(endpoint_name)
        endpoint_arn = endpoint_desc["EndpointArn"]
        assert k8s.get_resource_arn(resource) == endpoint_arn

        # endpoint transitions Creating -> InService state
        assert_endpoint_status_in_sync(endpoint_name, reference,
                                       cfg.ENDPOINT_STATUS_CREATING)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_endpoint_status_in_sync(endpoint_name, reference,
                                       cfg.ENDPOINT_STATUS_INSERVICE)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(endpoint_arn, resource_tags)
    def test_unversioned_model_package_completed(
            self, xgboost_unversioned_model_package):
        (reference, resource) = xgboost_unversioned_model_package
        assert k8s.get_resource_exists(reference)

        model_package_name = resource["spec"].get("modelPackageName", None)
        assert model_package_name is not None

        model_package_desc = get_sagemaker_model_package(model_package_name)
        model_package_arn = model_package_desc["ModelPackageArn"]

        if k8s.get_resource_arn(resource) is None:
            logging.error(
                f"ARN for this resource is None, resource status is: {resource['status']}"
            )

        assert k8s.get_resource_arn(resource) == model_package_arn

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_INPROGRESS)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_package_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD,
                                                DELETE_WAIT_LENGTH)
        assert deleted is True
        assert get_sagemaker_model_package(model_package_name) is None
    def test_smoke(
        self, sagemaker_client, xgboost_churn_data_quality_monitoring_schedule
    ):
        (reference, resource, spec) = xgboost_churn_data_quality_monitoring_schedule
        assert k8s.get_resource_exists(reference)

        monitoring_schedule_name = resource["spec"].get("monitoringScheduleName")
        monitoring_schedule_desc = get_sagemaker_monitoring_schedule(
            sagemaker_client, monitoring_schedule_name
        )
        monitoring_schedule_arn = monitoring_schedule_desc["MonitoringScheduleArn"]
        assert k8s.get_resource_arn(resource) == monitoring_schedule_arn

        # scheule transitions Pending -> Scheduled state
        # Pending status is shortlived only for 30 seconds because baselining job has already been run
        # remove the checks for Pending status if the test is flaky because of this
        # as the main objective is to test for Scheduled status
        # OR
        # create the schedule with a on-going baseline job where it waits for the baselining job to complete
        assert (
            wait_resource_monitoring_schedule_status(
                reference, self.STATUS_PENDING, 5, 2
            )
            == self.STATUS_PENDING
        )
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False", 5, 2)

        self._assert_monitoring_schedule_status_in_sync(
            sagemaker_client, monitoring_schedule_name, reference, self.STATUS_SCHEDULED
        )
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(monitoring_schedule_arn, resource_tags)
        
        # Update the resource
        new_cron_expression = "cron(0 * * * ? *)"
        spec["spec"]["monitoringScheduleConfig"]["scheduleConfig"][
            "scheduleExpression"
        ] = new_cron_expression
        resource = k8s.patch_custom_resource(reference, spec)
        resource = k8s.wait_resource_consumed_by_controller(reference)
        assert resource is not None

        self._assert_monitoring_schedule_status_in_sync(
            sagemaker_client, monitoring_schedule_name, reference, self.STATUS_SCHEDULED
        )
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        latest_schedule = get_sagemaker_monitoring_schedule(
            sagemaker_client, monitoring_schedule_name
        )
        assert (
            latest_schedule["MonitoringScheduleConfig"]["ScheduleConfig"][
                "ScheduleExpression"
            ]
            == new_cron_expression
        )

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH)
        assert deleted
        assert get_sagemaker_monitoring_schedule(sagemaker_client, monitoring_schedule_name) is None