示例#1
0
    def test_rg_largecluster(self, rg_largecluster_input, rg_largecluster):
        (reference, _) = rg_largecluster
        assert k8s.wait_on_condition(reference,
                                     "ACK.ResourceSynced",
                                     "True",
                                     wait_periods=240)

        # assertions after initial creation
        desired_node_groups = int(rg_largecluster_input['NUM_NODE_GROUPS'])
        desired_replica_count = int(
            rg_largecluster_input['REPLICAS_PER_NODE_GROUP'])
        desired_total_nodes = (desired_node_groups *
                               (1 + desired_replica_count))
        resource = k8s.get_resource(reference)
        assert resource['status']['status'] == "available"
        assert len(resource['status']['nodeGroups']) == desired_node_groups
        assert len(resource['status']['memberClusters']) == desired_total_nodes

        # update, wait for resource to sync
        desired_node_groups = desired_node_groups - 10
        desired_total_nodes = (desired_node_groups *
                               (1 + desired_replica_count))
        patch = {
            "spec": {
                "numNodeGroups":
                desired_node_groups,
                "nodeGroupConfiguration":
                provide_node_group_configuration(desired_node_groups)
            }
        }
        _ = k8s.patch_custom_resource(reference, patch)
        sleep(
            DEFAULT_WAIT_SECS
        )  # required as controller has likely not placed the resource in modifying
        assert k8s.wait_on_condition(reference,
                                     "ACK.ResourceSynced",
                                     "True",
                                     wait_periods=240)

        # assert new state after scaling in
        resource = k8s.get_resource(reference)
        assert resource['status']['status'] == "available"
        assert len(resource['status']['nodeGroups']) == desired_node_groups
        assert len(resource['status']['memberClusters']) == desired_total_nodes
示例#2
0
    def test_model_has_correct_arn(self, sagemaker_client, xgboost_model):
        (reference, _) = xgboost_model
        resource = k8s.get_resource(reference)
        model_name = resource["spec"].get("modelName", None)

        assert model_name is not None

        assert self._get_resource_model_arn(
            resource) == self._get_sagemaker_model_arn(sagemaker_client,
                                                       model_name)
示例#3
0
    def test_config_is_deleted(self, sagemaker_client, single_variant_config):
        (reference, _) = single_variant_config
        resource = k8s.get_resource(reference)
        config_name = resource["spec"].get("endpointConfigName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert (self._get_sagemaker_endpoint_config_arn(
            sagemaker_client, config_name) is None)
示例#4
0
    def test_config_has_correct_arn(self, sagemaker_client,
                                    single_variant_config):
        (reference, _) = single_variant_config
        resource = k8s.get_resource(reference)
        config_name = resource["spec"].get("endpointConfigName", None)

        assert config_name is not None

        assert self._get_resource_endpoint_config_arn(
            resource) == self._get_sagemaker_endpoint_config_arn(
                sagemaker_client, config_name)
示例#5
0
    def test_model_is_deleted(self, sagemaker_client, xgboost_model):
        (reference, _) = xgboost_model
        resource = k8s.get_resource(reference)
        model_name = resource["spec"].get("modelName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert self._get_sagemaker_model_arn(sagemaker_client,
                                             model_name) is None
    def test_delete_endpoint(self, sagemaker_client, single_variant_xgboost_endpoint):
        (reference, _, _, _) = single_variant_xgboost_endpoint
        resource = k8s.get_resource(reference)
        endpoint_name = resource["spec"].get("endpointName", None)

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        assert (
            self._describe_sagemaker_endpoint(sagemaker_client, endpoint_name) is None
        )
示例#7
0
    def test_model_has_correct_arn(self, sagemaker_client, xgboost_model):
        (reference, _) = xgboost_model
        resource = k8s.get_resource(reference)
        model_name = resource['spec'].get('modelName', None)

        assert model_name is not None

        resource_model_arn = self._get_resource_model_arn(resource)
        expected_model_arn = self._get_sagemaker_model_arn(
            sagemaker_client, model_name)

        assert resource_model_arn == expected_model_arn
    def test_trainingjob_has_created_status(self, sagemaker_client,
                                            xgboost_trainingjob):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource['spec'].get('trainingJobName', None)

        assert trainingjob_name is not None

        current_trainingjob_status = self._get_sagemaker_trainingjob_status(
            sagemaker_client, trainingjob_name)
        expected_trainingjob_status_list = self._get_created_trainingjob_status_list(
        )
        assert current_trainingjob_status in expected_trainingjob_status_list
    def test_trainingjob_has_correct_arn(self, sagemaker_client,
                                         xgboost_trainingjob):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource['spec'].get('trainingJobName', None)

        assert trainingjob_name is not None

        resource_trainingjob_arn = self._get_resource_trainingjob_arn(resource)
        expected_trainingjob_arn = self._get_sagemaker_trainingjob_arn(
            sagemaker_client, trainingjob_name)

        assert resource_trainingjob_arn == expected_trainingjob_arn
    def test_processing_job_has_created_status(self, sagemaker_client,
                                               kmeans_processing_job):
        (reference, _) = kmeans_processing_job
        resource = k8s.get_resource(reference)
        processing_job_name = resource["spec"].get("processingJobName", None)

        assert processing_job_name is not None

        current_processing_job_status = self._get_sagemaker_processing_job_status(
            sagemaker_client, processing_job_name)
        expected_processing_job_status_list = (
            self._get_created_processing_job_status_list())
        assert current_processing_job_status in expected_processing_job_status_list
    def test_processing_job_has_correct_arn(self, sagemaker_client,
                                            kmeans_processing_job):
        (reference, _) = kmeans_processing_job
        resource = k8s.get_resource(reference)
        processing_job_name = resource["spec"].get("processingJobName", None)

        assert processing_job_name is not None

        resource_processing_job_arn = k8s.get_resource_arn(resource)
        expected_processing_job_arn = self._get_sagemaker_processing_job_arn(
            sagemaker_client, processing_job_name)

        assert resource_processing_job_arn == expected_processing_job_arn
    def test_processing_job_has_stopped_status(self, sagemaker_client,
                                               kmeans_processing_job):
        (reference, _) = kmeans_processing_job
        resource = k8s.get_resource(reference)
        processing_job_name = resource["spec"].get("processingJobName", None)

        assert processing_job_name is not None

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        current_processing_job_status = self._get_sagemaker_processing_job_status(
            sagemaker_client, processing_job_name)
        expected_processing_job_status_list = (
            self._get_stopped_processing_job_status_list())
        assert current_processing_job_status in expected_processing_job_status_list
    def test_trainingjob_has_stopped_status(self, sagemaker_client,
                                            xgboost_trainingjob):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource['spec'].get('trainingJobName', None)

        assert trainingjob_name is not None

        # Delete the k8s resource.
        k8s.delete_custom_resource(reference)
        # TODO: This sleep could be replaced by a wait loop but this is sufficient for now.
        time.sleep(5)

        current_trainingjob_status = self._get_sagemaker_trainingjob_status(
            sagemaker_client, trainingjob_name)
        expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list(
        )
        assert current_trainingjob_status in expected_trainingjob_status_list
    def test_trainingjob_has_stopped_status(
        self, sagemaker_client, xgboost_trainingjob
    ):
        (reference, _) = xgboost_trainingjob
        resource = k8s.get_resource(reference)
        trainingjob_name = resource["spec"].get("trainingJobName", None)

        assert trainingjob_name is not None

        # Delete the k8s resource.
        _, deleted = k8s.delete_custom_resource(reference)
        assert deleted is True

        current_trainingjob_status = self._get_sagemaker_trainingjob_status(
            sagemaker_client, trainingjob_name
        )
        expected_trainingjob_status_list = self._get_stopped_trainingjob_status_list()
        assert current_trainingjob_status in expected_trainingjob_status_list
    def _wait_resource_endpoint_status(
        self,
        reference: k8s.CustomResourceReference,
        expected_status: str,
        wait_periods: int = 18,
    ):
        resource_status = None
        for _ in range(wait_periods):
            time.sleep(30)
            resource = k8s.get_resource(reference)
            assert "endpointStatus" in resource["status"]
            resource_status = resource["status"]["endpointStatus"]
            if resource_status == expected_status:
                break
        else:
            logging.error(
                f"Wait for endpoint resource status: {expected_status} timed out. Actual status: {resource_status}"
            )

        return resource_status
    def test_endpoint_has_correct_arn_and_status(
        self, sagemaker_client, single_variant_xgboost_endpoint
    ):
        (reference, _, _, _) = single_variant_xgboost_endpoint
        resource = k8s.get_resource(reference)
        endpoint_name = resource["spec"].get("endpointName", None)

        assert endpoint_name is not None

        assert (
            self._get_resource_endpoint_arn(resource)
            == self._describe_sagemaker_endpoint(sagemaker_client, endpoint_name)[
                "EndpointArn"
            ]
        )

        self._assert_endpoint_status_in_sync(
            sagemaker_client, endpoint_name, reference, self.status_creating
        )
        self._assert_endpoint_status_in_sync(
            sagemaker_client, endpoint_name, reference, self.status_inservice
        )