Пример #1
0
def xgboost_transformjob(xgboost_model_for_transform):
    (transform_resource_name, model_resource_name) = xgboost_model_for_transform
    replacements = REPLACEMENT_VALUES.copy()
    replacements["MODEL_NAME"] = model_resource_name
    replacements["TRANSFORM_JOB_NAME"] = transform_resource_name

    reference, _, resource = create_sagemaker_resource(
        resource_plural=RESOURCE_PLURAL,
        resource_name=transform_resource_name,
        spec_file="xgboost_transformjob",
        replacements=replacements,
    )

    assert resource is not None
    if k8s.get_resource_arn(resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {resource['status']}"
        )
    assert k8s.get_resource_arn(resource) is not None

    yield (reference, resource)

    if k8s.get_resource_exists(reference):
        _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted
def multi_variant_config(name_suffix, single_container_model):
    config_resource_name = name_suffix + "-multi-variant-config"
    (_, model_resource) = single_container_model
    model_resource_name = model_resource["spec"].get("modelName", None)

    replacements = REPLACEMENT_VALUES.copy()
    replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name
    replacements["MODEL_NAME"] = model_resource_name

    config_reference, config_spec, config_resource = create_sagemaker_resource(
        resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL,
        resource_name=config_resource_name,
        spec_file="endpoint_config_multi_variant",
        replacements=replacements,
    )
    assert config_resource is not None
    if k8s.get_resource_arn(config_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {config_resource['status']}"
        )
    assert k8s.get_resource_arn(config_resource) is not None

    yield (config_reference, config_resource)

    _, deleted = k8s.delete_custom_resource(config_reference, 3, 10)
    assert deleted
def xgboost_model_package_group():
    resource_name = random_suffix_name("xgboost-model-package-group", 38)

    replacements = REPLACEMENT_VALUES.copy()
    replacements["MODEL_PACKAGE_GROUP_NAME"] = resource_name

    (
        model_package_group_reference,
        model_package_group_spec,
        model_package_group_resource,
    ) = create_sagemaker_resource(
        resource_plural=cfg.MODEL_PACKAGE_GROUP_RESOURCE_PLURAL,
        resource_name=resource_name,
        spec_file="xgboost_model_package_group",
        replacements=replacements,
    )
    assert model_package_group_resource is not None
    if k8s.get_resource_arn(model_package_group_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {model_package_group_resource['status']}"
        )
    assert k8s.get_resource_arn(model_package_group_resource) is not None

    yield (model_package_group_reference, model_package_group_resource)

    # Delete the k8s resource if not already deleted by tests
    if k8s.get_resource_exists(model_package_group_reference):
        _, deleted = k8s.delete_custom_resource(model_package_group_reference,
                                                DELETE_WAIT_PERIOD,
                                                DELETE_WAIT_LENGTH)
        assert deleted
def xgboost_training_job():
    resource_name = random_suffix_name("xgboost-trainingjob", 32)
    replacements = REPLACEMENT_VALUES.copy()
    replacements["TRAINING_JOB_NAME"] = resource_name
    reference, _, resource = create_sagemaker_resource(
        resource_plural=RESOURCE_PLURAL,
        resource_name=resource_name,
        spec_file="xgboost_trainingjob",
        replacements=replacements,
    )

    assert resource is not None
    if k8s.get_resource_arn(resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {resource['status']}"
        )
    assert k8s.get_resource_arn(resource) is not None

    yield (reference, resource)

    if k8s.get_resource_exists(reference):
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted
    def test_versioned_model_package_completed(
            self, xgboost_versioned_model_package):
        (reference, spec, resource) = xgboost_versioned_model_package
        assert k8s.get_resource_exists(reference)

        model_package_group_name = resource["spec"].get(
            "modelPackageGroupName")
        # Model package name for Versioned Model packages is the ARN of the resource
        model_package_name = sagemaker_client().list_model_packages(
            ModelPackageGroupName=model_package_group_name
        )["ModelPackageSummaryList"][0]["ModelPackageArn"]

        model_package_desc = get_sagemaker_model_package(model_package_name)
        if k8s.get_resource_arn(resource) is None:
            logging.error(
                f"ARN for this resource is None, resource status is: {resource['status']}"
            )

        assert k8s.get_resource_arn(resource) == model_package_name

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_INPROGRESS)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        # Update the resource
        new_model_approval_status = "Approved"
        approval_description = "Approved modelpackage"
        spec["spec"]["modelApprovalStatus"] = new_model_approval_status
        spec["spec"]["approvalDescription"] = approval_description
        resource = k8s.patch_custom_resource(reference, spec)
        resource = k8s.wait_resource_consumed_by_controller(reference)
        assert resource is not None

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        model_package_desc = get_sagemaker_model_package(model_package_name)
        assert model_package_desc[
            "ModelApprovalStatus"] == new_model_approval_status
        assert model_package_desc[
            "ApprovalDescription"] == approval_description

        assert (resource["spec"].get("modelApprovalStatus",
                                     None) == new_model_approval_status)
        assert resource["spec"].get("approvalDescription",
                                    None) == approval_description
        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD,
                                                DELETE_WAIT_LENGTH)
        assert deleted is True
        assert get_sagemaker_model_package(model_package_name) is None
def faulty_config(name_suffix, single_container_model):
    replacements = REPLACEMENT_VALUES.copy()

    # copy model data to a temp S3 location and delete it after model is created on SageMaker
    model_bucket = replacements["SAGEMAKER_DATA_BUCKET"]
    copy_source = {
        "Bucket": model_bucket,
        "Key": "sagemaker/model/xgboost-mnist-model.tar.gz",
    }
    model_destination_key = "sagemaker/model/delete/xgboost-mnist-model.tar.gz"
    s3.copy_object(model_bucket, copy_source, model_destination_key)

    model_resource_name = name_suffix + "faulty-model"
    replacements["MODEL_NAME"] = model_resource_name
    replacements[
        "MODEL_LOCATION"] = f"s3://{model_bucket}/{model_destination_key}"
    model_reference, model_spec, model_resource = create_sagemaker_resource(
        resource_plural=cfg.MODEL_RESOURCE_PLURAL,
        resource_name=model_resource_name,
        spec_file="xgboost_model_with_model_location",
        replacements=replacements,
    )
    assert model_resource is not None
    if k8s.get_resource_arn(model_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {model_resource['status']}"
        )
    assert k8s.get_resource_arn(model_resource) is not None
    s3.delete_object(model_bucket, model_destination_key)

    config_resource_name = name_suffix + "-faulty-config"
    (_, model_resource) = single_container_model
    model_resource_name = model_resource["spec"].get("modelName", None)

    replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name

    config_reference, config_spec, config_resource = create_sagemaker_resource(
        resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL,
        resource_name=config_resource_name,
        spec_file="endpoint_config_multi_variant",
        replacements=replacements,
    )
    assert config_resource is not None
    if k8s.get_resource_arn(config_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {config_resource['status']}"
        )
    assert k8s.get_resource_arn(config_resource) is not None

    yield (config_reference, config_resource)

    for cr in (model_reference, config_reference):
        _, deleted = k8s.delete_custom_resource(cr, 3, 10)
        assert deleted
    def test_smoke(
        self, sagemaker_client, xgboost_churn_model_explainability_job_definition
    ):
        (reference, resource) = xgboost_churn_model_explainability_job_definition
        assert k8s.get_resource_exists(reference)

        job_definition_name = resource["spec"].get("jobDefinitionName")
        job_definition_desc = get_sagemaker_model_explainability_job_definition(
            sagemaker_client, job_definition_name
        )
        job_definition_arn = job_definition_desc["JobDefinitionArn"]
        assert k8s.get_resource_arn(resource) == job_definition_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(job_definition_arn, resource_tags)
        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted
        assert (
            get_sagemaker_model_explainability_job_definition(
                sagemaker_client, job_definition_name
            )
            is None
        )
    def test_create_model_package_group(self, xgboost_model_package_group):
        (reference, resource) = xgboost_model_package_group
        assert k8s.get_resource_exists(reference)

        model_package_group_name = resource["spec"].get(
            "modelPackageGroupName", None)

        assert model_package_group_name is not None
        model_package_group_sm_desc = get_sagemaker_model_package_group(
            model_package_group_name)
        model_package_group_arn = model_package_group_sm_desc[
            "ModelPackageGroupArn"]
        assert k8s.get_resource_arn(resource) == model_package_group_arn

        self._assert_model_package_group_status_in_sync(
            model_package_group_name, reference, cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_package_group_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.DELETE_WAIT_PERIOD,
                                                cfg.DELETE_WAIT_LENGTH)
        assert deleted is True

        assert get_sagemaker_model_package_group(
            model_package_group_name) is None
    def test_stopped(self, xgboost_training_job):
        (reference, resource) = xgboost_training_job
        assert k8s.get_resource_exists(reference)

        training_job_name = resource["spec"].get("trainingJobName", None)
        assert training_job_name is not None

        training_job_desc = get_sagemaker_training_job(training_job_name)

        assert k8s.get_resource_arn(
            resource) == training_job_desc["TrainingJobArn"]
        assert training_job_desc[
            "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_training_status_in_sync(training_job_name, reference,
                                       cfg.JOB_STATUS_INPROGRESS)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True

        training_job_desc = get_sagemaker_training_job(training_job_name)
        assert training_job_desc[
            "TrainingJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
    def test_completed(self, xgboost_training_job):
        (reference, resource) = xgboost_training_job
        assert k8s.get_resource_exists(reference)

        training_job_name = resource["spec"].get("trainingJobName", None)
        assert training_job_name is not None

        training_job_desc = get_sagemaker_training_job(training_job_name)
        training_job_arn = training_job_desc["TrainingJobArn"]
        assert k8s.get_resource_arn(resource) == training_job_arn

        assert training_job_desc[
            "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_training_status_in_sync(training_job_name, reference,
                                       cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        # model artifact URL is populated
        resource = k8s.get_resource(reference)
        resource["status"]["modelArtifacts"]["s3ModelArtifacts"] is not None

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(training_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
Пример #11
0
    def test_stopped(self, xgboost_hpojob):
        (reference, resource) = xgboost_hpojob
        assert k8s.get_resource_exists(reference)

        hpo_job_name = resource["spec"].get("hyperParameterTuningJobName",
                                            None)
        assert hpo_job_name is not None

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert (k8s.get_resource_arn(resource) ==
                hpo_sm_desc["HyperParameterTuningJobArn"])
        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_hpo_status_in_sync(hpo_job_name, reference,
                                        cfg.JOB_STATUS_INPROGRESS)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert (hpo_sm_desc["HyperParameterTuningJobStatus"]
                in cfg.LIST_JOB_STATUS_STOPPED)
    def test_create_feature_group(self, feature_group):
        """Tests that a feature group can be created and deleted
        using the Feature Group Controller.
        """
        (reference, resource) = feature_group
        assert k8s.get_resource_exists(reference)

        feature_group_name = resource["spec"].get("featureGroupName", None)
        assert feature_group_name is not None

        feature_group_sm_desc = get_sagemaker_feature_group(feature_group_name)
        feature_group_arn = feature_group_sm_desc["FeatureGroupArn"]

        assert k8s.get_resource_arn(resource) == feature_group_arn

        assert feature_group_sm_desc[
            "FeatureGroupStatus"] == FEATURE_GROUP_STATUS_CREATING

        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_feature_group_status_in_sync(
            feature_group_name, reference, FEATURE_GROUP_STATUS_CREATED)

        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(feature_group_arn, resource_tags)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, WAIT_PERIOD_COUNT,
                                                WAIT_PERIOD_LENGTH)
        assert deleted

        assert get_sagemaker_feature_group(feature_group_name) is None
Пример #13
0
    def test_completed(self, xgboost_hpojob):
        (reference, resource) = xgboost_hpojob
        assert k8s.get_resource_exists(reference)

        hpo_job_name = resource["spec"].get("hyperParameterTuningJobName",
                                            None)
        assert hpo_job_name is not None

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        hpo_arn = hpo_sm_desc["HyperParameterTuningJobArn"]
        assert k8s.get_resource_arn(resource) == hpo_arn

        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_hpo_status_in_sync(hpo_job_name, reference,
                                        cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(hpo_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
Пример #14
0
    def test_completed(self, kmeans_processing_job):
        (reference, resource) = kmeans_processing_job
        assert k8s.get_resource_exists(reference)

        processing_job_name = resource["spec"].get("processingJobName", None)
        assert processing_job_name is not None

        processing_job_desc = get_sagemaker_processing_job(processing_job_name)
        processing_job_arn = processing_job_desc["ProcessingJobArn"]
        assert k8s.get_resource_arn(resource) == processing_job_arn

        assert processing_job_desc["ProcessingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_processing_status_in_sync(
            processing_job_name, reference, cfg.JOB_STATUS_COMPLETED
        )
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(processing_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
Пример #15
0
def user_profile_fixture(domain_fixture):
    (domain_reference, domain_resource, domain_spec) = domain_fixture
    assert k8s.get_resource_exists(domain_reference)

    domain_id = domain_resource["status"].get("domainID", None)
    assert domain_id is not None

    assert_domain_status_in_sync(domain_id, domain_reference, "InService")

    domain_resource = patch_domain_kernel_instance(
        domain_reference, domain_spec, "ml.t3.large"
    )
    wait_for_status("ml.t3.large", 10, 30, get_domain_kernel_instance, domain_id)
    assert_domain_status_in_sync(domain_id, domain_reference, "InService")

    resource_name = random_suffix_name("profile", 15)
    (
        user_profile_reference,
        user_profile_resource,
        user_profile_spec,
    ) = apply_user_profile_yaml(resource_name, domain_id)

    assert user_profile_resource is not None
    if k8s.get_resource_arn(user_profile_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {user_profile_resource['status']}"
        )
    assert k8s.get_resource_arn(user_profile_resource) is not None

    yield (
        domain_reference,
        domain_resource,
        domain_spec,
        user_profile_reference,
        user_profile_resource,
        user_profile_spec,
    )

    if k8s.get_resource_exists(user_profile_reference):
        _, deleted = k8s.delete_custom_resource(
            user_profile_reference,
            cfg.JOB_DELETE_WAIT_PERIODS,
            cfg.JOB_DELETE_WAIT_LENGTH,
        )
    assert deleted
Пример #16
0
def domain_fixture():
    resource_name = random_suffix_name("sm-domain", 15)
    reference, resource, spec = apply_domain_yaml(resource_name)

    assert resource is not None
    if k8s.get_resource_arn(resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {resource['status']}"
        )
    assert k8s.get_resource_arn(resource) is not None

    yield (reference, resource, spec)

    if k8s.get_resource_exists(reference):
        _, deleted = k8s.delete_custom_resource(
            reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH
        )
        assert deleted
    def test_smoke(self, dynamodb_client, dynamodb_table):
        (_, table_resource) = dynamodb_table
        resource_name = random_suffix_name("backup", 32)
        table_name = table_resource["spec"]["tableName"]

        replacements = REPLACEMENT_VALUES.copy()
        replacements["TABLE_NAME"] = table_name
        replacements["BACKUP_NAME"] = resource_name

        # Load Backup CR
        resource_data = load_dynamodb_resource(
            "backup",
            additional_replacements=replacements,
        )
        logging.debug(resource_data)

        # Create k8s resource
        ref = k8s.CustomResourceReference(
            CRD_GROUP,
            CRD_VERSION,
            RESOURCE_PLURAL,
            resource_name,
            namespace="default",
        )
        k8s.create_custom_resource(ref, resource_data)
        cr = k8s.wait_resource_consumed_by_controller(ref)

        assert cr is not None
        assert k8s.get_resource_exists(ref)

        wait_for_cr_status(
            ref,
            "backupStatus",
            "AVAILABLE",
            10,
            5,
        )

        backupArn = k8s.get_resource_arn(cr)
        # Check DynamoDB Backup exists
        exists = self.backup_exists(dynamodb_client, backupArn)
        assert exists

        # Delete k8s resource
        _, deleted = k8s.delete_custom_resource(ref)
        assert deleted is True

        time.sleep(DELETE_WAIT_AFTER_SECONDS)

        # Check DynamoDB Backup doesn't exists
        exists = self.backup_exists(dynamodb_client, backupArn)
        assert not exists
    def test_processing_job_has_correct_arn(self, sagemaker_client,
                                            kmeans_processing_job):
        (reference, _) = kmeans_processing_job
        resource = k8s.get_resource(reference)
        processing_job_name = resource["spec"].get("processingJobName", None)

        assert processing_job_name is not None

        resource_processing_job_arn = k8s.get_resource_arn(resource)
        expected_processing_job_arn = self._get_sagemaker_processing_job_arn(
            sagemaker_client, processing_job_name)

        assert resource_processing_job_arn == expected_processing_job_arn
    def test_trainingjob_has_correct_arn(self, sagemaker_client,
                                         xgboost_trainingjob):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource["spec"].get("trainingJobName", None)

        assert trainingjob_name is not None

        resource_trainingjob_arn = k8s.get_resource_arn(resource)
        expected_trainingjob_arn = self._get_sagemaker_trainingjob_arn(
            sagemaker_client, trainingjob_name)

        assert resource_trainingjob_arn == expected_trainingjob_arn
def single_container_model(name_suffix):
    model_resource_name = name_suffix + "-model"
    replacements = REPLACEMENT_VALUES.copy()
    replacements["MODEL_NAME"] = model_resource_name

    model_reference, model_spec, model_resource = create_sagemaker_resource(
        resource_plural=cfg.MODEL_RESOURCE_PLURAL,
        resource_name=model_resource_name,
        spec_file="xgboost_model",
        replacements=replacements,
    )
    assert model_resource is not None
    if k8s.get_resource_arn(model_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {model_resource['status']}"
        )
    assert k8s.get_resource_arn(model_resource) is not None

    yield (model_reference, model_resource)

    _, deleted = k8s.delete_custom_resource(model_reference, 3, 10)
    assert deleted
    def test_unversioned_model_package_completed(
            self, xgboost_unversioned_model_package):
        (reference, resource) = xgboost_unversioned_model_package
        assert k8s.get_resource_exists(reference)

        model_package_name = resource["spec"].get("modelPackageName", None)
        assert model_package_name is not None

        model_package_desc = get_sagemaker_model_package(model_package_name)
        model_package_arn = model_package_desc["ModelPackageArn"]

        if k8s.get_resource_arn(resource) is None:
            logging.error(
                f"ARN for this resource is None, resource status is: {resource['status']}"
            )

        assert k8s.get_resource_arn(resource) == model_package_arn

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_INPROGRESS)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        self._assert_model_package_status_in_sync(model_package_name,
                                                  reference,
                                                  cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_package_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD,
                                                DELETE_WAIT_LENGTH)
        assert deleted is True
        assert get_sagemaker_model_package(model_package_name) is None
Пример #22
0
def single_variant_config():
    config_resource_name = random_suffix_name("single-variant-config", 32)
    model_resource_name = config_resource_name + "-model"

    replacements = REPLACEMENT_VALUES.copy()
    replacements["ENDPOINT_CONFIG_NAME"] = config_resource_name
    replacements["MODEL_NAME"] = model_resource_name

    model_reference, model_spec, model_resource = create_sagemaker_resource(
        resource_plural=cfg.MODEL_RESOURCE_PLURAL,
        resource_name=model_resource_name,
        spec_file="xgboost_model",
        replacements=replacements,
    )
    assert model_resource is not None
    if k8s.get_resource_arn(model_resource) is None:
        logging.error(
            f"ARN for this resource is None, resource status is: {model_resource['status']}"
        )
    assert k8s.get_resource_arn(model_resource) is not None

    config_reference, config_spec, config_resource = create_sagemaker_resource(
        resource_plural=cfg.ENDPOINT_CONFIG_RESOURCE_PLURAL,
        resource_name=config_resource_name,
        spec_file="endpoint_config_single_variant",
        replacements=replacements,
    )
    assert config_resource is not None

    yield (config_reference, config_resource)

    k8s.delete_custom_resource(model_reference, 3, 10)
    # Delete the k8s resource if not already deleted by tests
    if k8s.get_resource_exists(config_reference):
        _, deleted = k8s.delete_custom_resource(config_reference, 3, 10)
        assert deleted
    def test_create_cross_region_model(self, cross_region_model):
        (reference, resource) = cross_region_model
        assert k8s.get_resource_exists(reference)

        sm_client = sagemaker_client(get_cross_region())
        model_name = resource["spec"].get("modelName", None)
        model_desc = get_sagemaker_model(model_name, sm_client)
        cross_region_model_arn = model_desc["ModelArn"]
        assert k8s.get_resource_arn(resource) == cross_region_model_arn

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted

        assert get_sagemaker_model(model_name, sm_client) is None
Пример #24
0
    def test_create_endpoint_config(self, single_variant_config):
        (reference, resource) = single_variant_config
        assert k8s.get_resource_exists(reference)

        config_name = resource["spec"].get("endpointConfigName", None)
        endpoint_config_desc = get_sagemaker_endpoint_config(config_name)
        endpoint_arn = endpoint_config_desc["EndpointConfigArn"]
        assert k8s.get_resource_arn(resource) == endpoint_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(endpoint_arn, resource_tags)
        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted

        assert get_sagemaker_endpoint_config(config_name) is None
Пример #25
0
    def test_completed(self, xgboost_training_job_debugger):
        (reference, resource) = xgboost_training_job_debugger
        assert k8s.get_resource_exists(reference)

        training_job_name = resource["spec"].get("trainingJobName", None)
        assert training_job_name is not None

        training_job_desc = get_sagemaker_training_job(training_job_name)
        training_job_arn = training_job_desc["TrainingJobArn"]

        resource_arn = k8s.get_resource_arn(resource)
        if resource_arn is None:
            logging.error(
                f"ARN for this resource is None, resource status is: {resource['status']}"
            )
        assert resource_arn == training_job_arn

        assert training_job_desc[
            "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_training_status_in_sync(training_job_name, reference,
                                       cfg.JOB_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        # Assert debugger rule evaluation completed
        self._assert_training_rule_eval_status_in_sync(
            training_job_name, "DebugRule", reference,
            cfg.RULE_STATUS_COMPLETED)

        # Assert profiler rule evaluation completed
        self._assert_training_rule_eval_status_in_sync(
            training_job_name, "ProfilerRule", reference,
            cfg.RULE_STATUS_COMPLETED)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(training_job_arn, resource_tags)

        # Check that you can delete a completed resource from k8s
        _, deleted = k8s.delete_custom_resource(reference,
                                                cfg.JOB_DELETE_WAIT_PERIODS,
                                                cfg.JOB_DELETE_WAIT_LENGTH)
        assert deleted is True
    def test_create_model(self, xgboost_model):
        (reference, resource) = xgboost_model
        assert k8s.get_resource_exists(reference)

        model_name = resource["spec"].get("modelName", None)
        model_desc = get_sagemaker_model(model_name)
        model_arn = model_desc["ModelArn"]
        assert k8s.get_resource_arn(resource) == model_arn

        # random sleep before we check for tags to reduce test flakyness
        time.sleep(cfg.TAG_DELAY_SLEEP)
        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(model_arn, resource_tags)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference, 3, 10)
        assert deleted

        assert get_sagemaker_model(model_name) is None
Пример #27
0
    def create_notebook_test(self, notebook_instance):
        (reference, resource, _) = notebook_instance
        assert k8s.get_resource_exists(reference)
        assert k8s.get_resource_arn(resource) is not None

        # Create the resource and verify that its Pending
        notebook_instance_name = resource["spec"].get("notebookInstanceName",
                                                      None)
        assert notebook_instance_name is not None

        notebook_description = get_notebook_instance(notebook_instance_name)
        assert notebook_description["NotebookInstanceStatus"] == "Pending"

        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")
        self._assert_notebook_status_in_sync(notebook_instance_name, reference,
                                             "Pending")

        # wait for the resource to go to the InService state and make sure the operator is synced with sagemaker.
        self._assert_notebook_status_in_sync(notebook_instance_name, reference,
                                             "InService")
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
Пример #28
0
    def test_create_hpo(self, xgboost_hpojob):
        (reference, resource) = xgboost_hpojob
        assert k8s.get_resource_exists(reference)

        hpo_job_name = resource["spec"].get("hyperParameterTuningJobName",
                                            None)
        assert hpo_job_name is not None

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert (k8s.get_resource_arn(resource) ==
                hpo_sm_desc["HyperParameterTuningJobArn"])
        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_CREATED

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name)
        assert hpo_sm_desc[
            "HyperParameterTuningJobStatus"] in HPO_JOB_STATUS_STOPPED
    def test_create_update_delete(self, notebook_instance_lifecycleConfig):
        (reference, resource, spec) = notebook_instance_lifecycleConfig
        assert k8s.get_resource_exists(reference)

        # Getting the resource name
        notebook_instance_lfc_name = resource["spec"].get(
            "notebookInstanceLifecycleConfigName", None)
        assert notebook_instance_lfc_name is not None
        notebook_instance_lfc_desc = get_notebook_instance_lifecycle_config(
            notebook_instance_lfc_name)
        assert (
            k8s.get_resource_arn(resource) ==
            notebook_instance_lfc_desc["NotebookInstanceLifecycleConfigArn"])
        # We need to keep track of the current time so its best to just do
        # the update test with the create test.
        # update content is pip install six
        assert "lastModifiedTime" in resource["status"]
        last_modified_time = resource["status"]["lastModifiedTime"]
        update_content = "cGlwIGluc3RhbGwgc2l4"
        spec["spec"]["onStart"] = [{
            "content": update_content
        }]  # cGlwIGluc3RhbGwgc2l4 = pip install six
        k8s.patch_custom_resource(reference, spec)

        assert self.wait_until_update(reference, last_modified_time) == True

        # Verifying that an update was successful
        notebook_instance_lfc_desc = get_notebook_instance_lifecycle_config(
            notebook_instance_lfc_name)
        assert notebook_instance_lfc_desc["OnStart"][0][
            "Content"] == update_content

        # Deleting the resource
        _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD,
                                                DELETE_PERIOD_LENGTH)
        assert deleted is True
        assert (
            get_notebook_instance_lifecycle_config(notebook_instance_lfc_name)
            is None)
    def create_endpoint_test(self, xgboost_endpoint):
        (reference, resource, _) = xgboost_endpoint
        assert k8s.get_resource_exists(reference)

        # endpoint has correct arn and status
        endpoint_name = resource["spec"].get("endpointName", None)
        assert endpoint_name is not None

        endpoint_desc = get_sagemaker_endpoint(endpoint_name)
        endpoint_arn = endpoint_desc["EndpointArn"]
        assert k8s.get_resource_arn(resource) == endpoint_arn

        # endpoint transitions Creating -> InService state
        assert_endpoint_status_in_sync(endpoint_name, reference,
                                       cfg.ENDPOINT_STATUS_CREATING)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False")

        assert_endpoint_status_in_sync(endpoint_name, reference,
                                       cfg.ENDPOINT_STATUS_INSERVICE)
        assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")

        resource_tags = resource["spec"].get("tags", None)
        assert_tags_in_sync(endpoint_arn, resource_tags)