def test_rg_update_cpg(self, rg_update_cpg_input, rg_update_cpg, bootstrap_resources): # wait for resource to sync and retrieve initial state (reference, _) = rg_update_cpg assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # update, wait for resource to sync patch = { "spec": { "cacheParameterGroupName": bootstrap_resources.CPGName } } _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=5) # should be immediate # assert new state cc = retrieve_cache_cluster(rg_update_cpg_input['RG_ID']) assert cc['CacheParameterGroup'][ 'CacheParameterGroupName'] == bootstrap_resources.CPGName
def test_completed(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) hpo_arn = hpo_sm_desc["HyperParameterTuningJobArn"] assert k8s.get_resource_arn(resource) == hpo_arn assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_hpo_status_in_sync(hpo_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(hpo_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_completed(self, xgboost_training_job): (reference, resource) = xgboost_training_job assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) training_job_arn = training_job_desc["TrainingJobArn"] assert k8s.get_resource_arn(resource) == training_job_arn assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") # model artifact URL is populated resource = k8s.get_resource(reference) resource["status"]["modelArtifacts"]["s3ModelArtifacts"] is not None resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(training_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_create_feature_group(self, feature_group): """Tests that a feature group can be created and deleted using the Feature Group Controller. """ (reference, resource) = feature_group assert k8s.get_resource_exists(reference) feature_group_name = resource["spec"].get("featureGroupName", None) assert feature_group_name is not None feature_group_sm_desc = get_sagemaker_feature_group(feature_group_name) feature_group_arn = feature_group_sm_desc["FeatureGroupArn"] assert k8s.get_resource_arn(resource) == feature_group_arn assert feature_group_sm_desc[ "FeatureGroupStatus"] == FEATURE_GROUP_STATUS_CREATING assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_feature_group_status_in_sync( feature_group_name, reference, FEATURE_GROUP_STATUS_CREATED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(feature_group_arn, resource_tags) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, WAIT_PERIOD_COUNT, WAIT_PERIOD_LENGTH) assert deleted assert get_sagemaker_feature_group(feature_group_name) is None
def test_rg_upgrade_ev(self, rg_upgrade_ev_input, rg_upgrade_ev): (reference, _) = rg_upgrade_ev assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert initial state cc = retrieve_cache_cluster(rg_upgrade_ev_input['RG_ID']) assert cc is not None assert cc['EngineVersion'] == rg_upgrade_ev_input['ENGINE_VERSION'] # upgrade engine version, wait for resource to sync desired_engine_version = "5.0.6" patch = {"spec": {"engineVersion": desired_engine_version}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert new state after upgrading engine version resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert resource['spec']['engineVersion'] == desired_engine_version cc = retrieve_cache_cluster(rg_upgrade_ev_input['RG_ID']) assert cc is not None assert cc['EngineVersion'] == desired_engine_version
def test_completed(self, kmeans_processing_job): (reference, resource) = kmeans_processing_job assert k8s.get_resource_exists(reference) processing_job_name = resource["spec"].get("processingJobName", None) assert processing_job_name is not None processing_job_desc = get_sagemaker_processing_job(processing_job_name) processing_job_arn = processing_job_desc["ProcessingJobArn"] assert k8s.get_resource_arn(resource) == processing_job_arn assert processing_job_desc["ProcessingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_processing_status_in_sync( processing_job_name, reference, cfg.JOB_STATUS_COMPLETED ) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(processing_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_versioned_model_package_completed( self, xgboost_versioned_model_package): (reference, spec, resource) = xgboost_versioned_model_package assert k8s.get_resource_exists(reference) model_package_group_name = resource["spec"].get( "modelPackageGroupName") # Model package name for Versioned Model packages is the ARN of the resource model_package_name = sagemaker_client().list_model_packages( ModelPackageGroupName=model_package_group_name )["ModelPackageSummaryList"][0]["ModelPackageArn"] model_package_desc = get_sagemaker_model_package(model_package_name) if k8s.get_resource_arn(resource) is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert k8s.get_resource_arn(resource) == model_package_name self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_INPROGRESS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") # Update the resource new_model_approval_status = "Approved" approval_description = "Approved modelpackage" spec["spec"]["modelApprovalStatus"] = new_model_approval_status spec["spec"]["approvalDescription"] = approval_description resource = k8s.patch_custom_resource(reference, spec) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None self._assert_model_package_status_in_sync(model_package_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") model_package_desc = get_sagemaker_model_package(model_package_name) assert model_package_desc[ "ModelApprovalStatus"] == new_model_approval_status assert model_package_desc[ "ApprovalDescription"] == approval_description assert (resource["spec"].get("modelApprovalStatus", None) == new_model_approval_status) assert resource["spec"].get("approvalDescription", None) == approval_description # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, DELETE_WAIT_PERIOD, DELETE_WAIT_LENGTH) assert deleted is True assert get_sagemaker_model_package(model_package_name) is None
def test_rg_auth_token(self, rg_auth_token, secrets): (reference, _) = rg_auth_token assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=30) patch = {"spec": {"authToken": {"name": secrets['NAME2'], "key": secrets['KEY2']}}} k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=30)
def test_rg_associate_resources(self, rg_associate_resources_input, rg_associate_resources, bootstrap_resources): (reference, _) = rg_associate_resources assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # associate resources, wait for RG to sync sg_list = [ bootstrap_resources.SecurityGroup1, bootstrap_resources.SecurityGroup2 ] sns_topic = bootstrap_resources.SnsTopic1 ug_list = [bootstrap_resources.UserGroup1] patch = { "spec": { "securityGroupIDs": sg_list, "notificationTopicARN": sns_topic, "userGroupIDs": ug_list } } _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert new state assert_associated_resources(rg_associate_resources_input['RG_ID'], sg_list, sns_topic, ug_list) # change associated resources sg_list = [bootstrap_resources.SecurityGroup2] sns_topic = bootstrap_resources.SnsTopic2 ug_list = [bootstrap_resources.UserGroup2] patch = { "spec": { "securityGroupIDs": sg_list, "notificationTopicARN": sns_topic, "userGroupIDs": ug_list } } _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert new state assert_associated_resources(rg_associate_resources_input['RG_ID'], sg_list, sns_topic, ug_list)
def update_endpoint_failed_test(self, sagemaker_client, single_variant_config, faulty_config, xgboost_endpoint): (endpoint_reference, _, endpoint_spec) = xgboost_endpoint (_, faulty_config_resource) = faulty_config faulty_config_name = faulty_config_resource["spec"].get( "endpointConfigName", None) endpoint_spec["spec"]["endpointConfigName"] = faulty_config_name endpoint_resource = k8s.patch_custom_resource(endpoint_reference, endpoint_spec) endpoint_resource = k8s.wait_resource_consumed_by_controller( endpoint_reference) assert endpoint_resource is not None # endpoint transitions Updating -> InService state self._assert_endpoint_status_in_sync( sagemaker_client, endpoint_reference.name, endpoint_reference, self.status_udpating, ) assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False") endpoint_resource = k8s.get_resource(endpoint_reference) assert (endpoint_resource["status"].get( "lastEndpointConfigNameForUpdate", None) == faulty_config_name) self._assert_endpoint_status_in_sync( sagemaker_client, endpoint_reference.name, endpoint_reference, self.status_inservice, ) assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "True") assert k8s.assert_condition_state_message( endpoint_reference, "ACK.Terminal", "True", "Unable to update Endpoint. Check FailureReason", ) endpoint_resource = k8s.get_resource(endpoint_reference) assert endpoint_resource["status"].get("failureReason", None) is not None # additional check: endpoint using old endpoint config (_, old_config_resource) = single_variant_config current_config_name = endpoint_resource["status"].get( "latestEndpointConfigName") assert (current_config_name is not None and current_config_name == old_config_resource["spec"].get( "endpointConfigName", None))
def update_endpoint_failed_test(self, single_variant_config, faulty_config, xgboost_endpoint): (endpoint_reference, _, endpoint_spec) = xgboost_endpoint (_, faulty_config_resource) = faulty_config faulty_config_name = faulty_config_resource["spec"].get( "endpointConfigName", None) endpoint_spec["spec"]["endpointConfigName"] = faulty_config_name endpoint_resource = k8s.patch_custom_resource(endpoint_reference, endpoint_spec) endpoint_resource = k8s.wait_resource_consumed_by_controller( endpoint_reference) assert endpoint_resource is not None # endpoint transitions Updating -> InService state assert_endpoint_status_in_sync( endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_UPDATING, ) assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False") endpoint_resource = k8s.get_resource(endpoint_reference) annotations = endpoint_resource["metadata"].get("annotations", None) assert annotations is not None assert annotations[ LAST_ENDPOINTCONFIG_UPDATE_ANNOTATION] == faulty_config_name assert_endpoint_status_in_sync( endpoint_reference.name, endpoint_reference, cfg.ENDPOINT_STATUS_INSERVICE, ) assert k8s.wait_on_condition(endpoint_reference, "ACK.ResourceSynced", "False") (_, old_config_resource) = single_variant_config current_config_name = old_config_resource["spec"].get( "endpointConfigName", None) assert k8s.assert_condition_state_message( endpoint_reference, "ACK.Terminal", "True", FAIL_UPDATE_ERROR_MESSAGE + current_config_name, ) endpoint_resource = k8s.get_resource(endpoint_reference) assert endpoint_resource["status"].get("failureReason", None) is not None
def test_create_model_package_group(self, xgboost_model_package_group): (reference, resource) = xgboost_model_package_group assert k8s.get_resource_exists(reference) model_package_group_name = resource["spec"].get( "modelPackageGroupName", None) assert model_package_group_name is not None model_package_group_sm_desc = get_sagemaker_model_package_group( model_package_group_name) model_package_group_arn = model_package_group_sm_desc[ "ModelPackageGroupArn"] assert k8s.get_resource_arn(resource) == model_package_group_arn self._assert_model_package_group_status_in_sync( model_package_group_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(model_package_group_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.DELETE_WAIT_PERIOD, cfg.DELETE_WAIT_LENGTH) assert deleted is True assert get_sagemaker_model_package_group( model_package_group_name) is None
def test_user_nopass(self, user_nopass, user_nopass_input): (reference, resource) = user_nopass assert k8s.get_resource_exists(reference) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=5) resource = k8s.get_resource(reference) assert resource["status"]["lastRequestedAccessString"] == user_nopass_input["ACCESS_STRING"] new_access_string = "on ~app::* -@all +@read +@write" user_patch = {"spec": {"accessString": new_access_string}} _ = k8s.patch_custom_resource(reference, user_patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=5) resource = k8s.get_resource(reference) assert resource["status"]["lastRequestedAccessString"] == new_access_string
def test_stopped(self, xgboost_training_job): (reference, resource) = xgboost_training_job assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) assert k8s.get_resource_arn( resource) == training_job_desc["TrainingJobArn"] assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_INPROGRESS) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True training_job_desc = get_sagemaker_training_job(training_job_name) assert training_job_desc[ "TrainingJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED
def test_stopped(self, xgboost_hpojob): (reference, resource) = xgboost_hpojob assert k8s.get_resource_exists(reference) hpo_job_name = resource["spec"].get("hyperParameterTuningJobName", None) assert hpo_job_name is not None hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert (k8s.get_resource_arn(resource) == hpo_sm_desc["HyperParameterTuningJobArn"]) assert hpo_sm_desc[ "HyperParameterTuningJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_hpo_status_in_sync(hpo_job_name, reference, cfg.JOB_STATUS_INPROGRESS) # Delete the k8s resource. _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True hpo_sm_desc = get_sagemaker_hpo_job(hpo_job_name) assert (hpo_sm_desc["HyperParameterTuningJobStatus"] in cfg.LIST_JOB_STATUS_STOPPED)
def test_crud_authorizer(self, api_resource): api_ref, api_cr = api_resource api_id = api_cr['status']['apiID'] test_data = REPLACEMENT_VALUES.copy() authorizer_name = random_suffix_name("ack-test-authorizer", 25) test_data['AUTHORIZER_NAME'] = authorizer_name test_data['AUTHORIZER_TITLE'] = authorizer_name test_data['API_ID'] = api_id test_data['AUTHORIZER_URI'] = f'arn:aws:apigateway:{get_region()}:lambda:path/2015-03-31/functions/{get_bootstrap_resources().AuthorizerFunctionArn}/invocations' authorizer_ref, authorizer_data = helper.authorizer_ref_and_data(authorizer_resource_name=authorizer_name, replacement_values=test_data) logging.debug(f"http api authorizer resource. name: {authorizer_name}, data: {authorizer_data}") # test create k8s.create_custom_resource(authorizer_ref, authorizer_data) time.sleep(CREATE_WAIT_AFTER_SECONDS) assert k8s.wait_on_condition(authorizer_ref, "ACK.ResourceSynced", "True", wait_periods=10) cr = k8s.get_resource(authorizer_ref) assert cr is not None authorizer_id = cr['status']['authorizerID'] # Let's check that the HTTP Api integration appears in Amazon API Gateway apigw_validator.assert_authorizer_is_present(api_id=api_id, authorizer_id=authorizer_id) apigw_validator.assert_authorizer_name( api_id=api_id, authorizer_id=authorizer_id, expected_authorizer_name=authorizer_name ) # test update updated_authorizer_title = 'updated-' + authorizer_name test_data['AUTHORIZER_TITLE'] = updated_authorizer_title updated_authorizer_resource_data = load_apigatewayv2_resource( "authorizer", additional_replacements=test_data, ) logging.debug(f"updated http api authorizer resource: {updated_authorizer_resource_data}") # Update the k8s resource k8s.patch_custom_resource(authorizer_ref, updated_authorizer_resource_data) time.sleep(UPDATE_WAIT_AFTER_SECONDS) condition.assert_synced(authorizer_ref) # Let's check that the HTTP Api authorizer appears in Amazon API Gateway with updated title apigw_validator.assert_authorizer_name( api_id=api_id, authorizer_id=authorizer_id, expected_authorizer_name=updated_authorizer_title ) # test delete k8s.delete_custom_resource(authorizer_ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) assert not k8s.get_resource_exists(authorizer_ref) # HTTP Api authorizer should no longer appear in Amazon API Gateway apigw_validator.assert_authorizer_is_deleted(api_id=api_id, authorizer_id=authorizer_id)
def test_user_password(self, user_password, user_password_input): (reference, resource) = user_password assert k8s.get_resource_exists(reference) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=5) resource = k8s.get_resource(reference) assert resource["status"]["authentication"] is not None assert resource["status"]["authentication"]["type_"] == "password" assert resource["status"]["authentication"]["passwordCount"] == 2
def test_crud_integration(self, api_resource): api_ref, api_cr = api_resource api_id = api_cr['status']['apiID'] test_data = REPLACEMENT_VALUES.copy() integration_name = random_suffix_name("ack-test-integration", 25) test_data['INTEGRATION_NAME'] = integration_name test_data['API_ID'] = api_id integration_ref, integration_data = helper.integration_ref_and_data(integration_resource_name=integration_name, replacement_values=test_data) logging.debug(f"http api integration resource. name: {integration_name}, data: {integration_data}") # test create k8s.create_custom_resource(integration_ref, integration_data) time.sleep(CREATE_WAIT_AFTER_SECONDS) assert k8s.wait_on_condition(integration_ref, "ACK.ResourceSynced", "True", wait_periods=10) cr = k8s.get_resource(integration_ref) assert cr is not None integration_id = cr['status']['integrationID'] # Let's check that the HTTP Api integration appears in Amazon API Gateway apigw_validator.assert_integration_is_present(api_id=api_id, integration_id=integration_id) apigw_validator.assert_integration_uri( api_id=api_id, integration_id=integration_id, expected_uri=test_data['INTEGRATION_URI'] ) # test update updated_uri = 'https://httpbin.org/post' test_data['INTEGRATION_URI'] = updated_uri updated_integration_resource_data = load_apigatewayv2_resource( "integration", additional_replacements=test_data, ) logging.debug(f"updated http api integration resource: {updated_integration_resource_data}") # Update the k8s resource k8s.patch_custom_resource(integration_ref, updated_integration_resource_data) time.sleep(UPDATE_WAIT_AFTER_SECONDS) condition.assert_synced(integration_ref) # Let's check that the HTTP Api integration appears in Amazon API Gateway with updated uri apigw_validator.assert_integration_uri( api_id=api_id, integration_id=integration_id, expected_uri=updated_uri ) # test delete k8s.delete_custom_resource(integration_ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) assert not k8s.get_resource_exists(integration_ref) # HTTP Api integration should no longer appear in Amazon API Gateway apigw_validator.assert_integration_is_deleted(api_id=api_id, integration_id=integration_id)
def test_completed(self, xgboost_training_job_debugger): (reference, resource) = xgboost_training_job_debugger assert k8s.get_resource_exists(reference) training_job_name = resource["spec"].get("trainingJobName", None) assert training_job_name is not None training_job_desc = get_sagemaker_training_job(training_job_name) training_job_arn = training_job_desc["TrainingJobArn"] resource_arn = k8s.get_resource_arn(resource) if resource_arn is None: logging.error( f"ARN for this resource is None, resource status is: {resource['status']}" ) assert resource_arn == training_job_arn assert training_job_desc[ "TrainingJobStatus"] == cfg.JOB_STATUS_INPROGRESS assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_training_status_in_sync(training_job_name, reference, cfg.JOB_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") # Assert debugger rule evaluation completed self._assert_training_rule_eval_status_in_sync( training_job_name, "DebugRule", reference, cfg.RULE_STATUS_COMPLETED) # Assert profiler rule evaluation completed self._assert_training_rule_eval_status_in_sync( training_job_name, "ProfilerRule", reference, cfg.RULE_STATUS_COMPLETED) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(training_job_arn, resource_tags) # Check that you can delete a completed resource from k8s _, deleted = k8s.delete_custom_resource(reference, cfg.JOB_DELETE_WAIT_PERIODS, cfg.JOB_DELETE_WAIT_LENGTH) assert deleted is True
def test_rg_largecluster(self, rg_largecluster_input, rg_largecluster): (reference, _) = rg_largecluster assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=240) # assertions after initial creation desired_node_groups = int(rg_largecluster_input['NUM_NODE_GROUPS']) desired_replica_count = int( rg_largecluster_input['REPLICAS_PER_NODE_GROUP']) desired_total_nodes = (desired_node_groups * (1 + desired_replica_count)) resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert len(resource['status']['nodeGroups']) == desired_node_groups assert len(resource['status']['memberClusters']) == desired_total_nodes # update, wait for resource to sync desired_node_groups = desired_node_groups - 10 desired_total_nodes = (desired_node_groups * (1 + desired_replica_count)) patch = { "spec": { "numNodeGroups": desired_node_groups, "nodeGroupConfiguration": provide_node_group_configuration(desired_node_groups) } } _ = k8s.patch_custom_resource(reference, patch) sleep( DEFAULT_WAIT_SECS ) # required as controller has likely not placed the resource in modifying assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=240) # assert new state after scaling in resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert len(resource['status']['nodeGroups']) == desired_node_groups assert len(resource['status']['memberClusters']) == desired_total_nodes
def test_rg_cmd_update(self, rg_cmd_update_input, rg_cmd_update): (reference, _) = rg_cmd_update assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=30) # assertions after initial creation desired_node_groups = int(rg_cmd_update_input['NUM_NODE_GROUPS']) desired_replica_count = int(rg_cmd_update_input['REPLICAS_PER_NODE_GROUP']) desired_total_nodes = (desired_node_groups * (1 + desired_replica_count)) resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert len(resource['status']['nodeGroups']) == desired_node_groups assert len(resource['status']['memberClusters']) == desired_total_nodes cc = retrieve_cache_cluster(rg_cmd_update_input['RG_ID']) assert cc is not None assert cc['EngineVersion'] == rg_cmd_update_input['ENGINE_VERSION'] # increase replica count, wait for resource to sync desired_replica_count += 1 desired_total_nodes = (desired_node_groups * (1 + desired_replica_count)) patch = {"spec": {"replicasPerNodeGroup": desired_replica_count}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) # required as controller has likely not placed the resource in modifying assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=30) # assert new state after increasing replica count resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert len(resource['status']['nodeGroups']) == desired_node_groups assert len(resource['status']['memberClusters']) == desired_total_nodes # upgrade engine version, wait for resource to sync desired_engine_version = "5.0.6" patch = {"spec": {"engineVersion": desired_engine_version}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=30) # assert new state after upgrading engine version resource = k8s.get_resource(reference) assert resource['status']['status'] == "available" assert resource['spec']['engineVersion'] == desired_engine_version cc = retrieve_cache_cluster(rg_cmd_update_input['RG_ID']) assert cc is not None assert cc['EngineVersion'] == desired_engine_version
def test_rg_cme_even_shards(self, rg_cme_even_shards, rg_cme_even_shards_input): (reference, _) = rg_cme_even_shards assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) nng = int(rg_cme_even_shards_input['NUM_NODE_GROUPS']) rpng = int(rg_cme_even_shards_input['REPLICAS_PER_NODE_GROUP']) # assert initial state resource = k8s.get_resource(reference) assert len(resource['status']['nodeGroups']) == nng assert_even_shards_replica_count(resource, rpng) # increase replica count, wait for resource to sync rpng += 1 patch = {"spec": {"replicasPerNodeGroup": rpng}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert replica count has increased resource = k8s.get_resource(reference) assert len(resource['status']['nodeGroups']) == nng assert_even_shards_replica_count(resource, rpng) # decrease replica count, wait for resource to sync rpng -= 2 patch = {"spec": {"replicasPerNodeGroup": rpng}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert replica count has decreased resource = k8s.get_resource(reference) assert len(resource['status']['nodeGroups']) == nng assert_even_shards_replica_count(resource, rpng)
def test_crud_stage(self, api_resource): api_ref, api_cr = api_resource api_id = api_cr['status']['apiID'] test_data = REPLACEMENT_VALUES.copy() stage_name = random_suffix_name("ack-test-stage", 25) test_data['STAGE_NAME'] = stage_name test_data['API_ID'] = api_id stage_ref, stage_data = helper.stage_ref_and_data(stage_resource_name=stage_name, replacement_values=test_data) logging.debug(f"http api stage resource. name: {stage_name}, data: {stage_data}") # test create k8s.create_custom_resource(stage_ref, stage_data) time.sleep(CREATE_WAIT_AFTER_SECONDS) assert k8s.wait_on_condition(stage_ref, "ACK.ResourceSynced", "True", wait_periods=10) cr = k8s.get_resource(stage_ref) assert cr is not None # Let's check that the HTTP Api integration appears in Amazon API Gateway apigw_validator.assert_stage_is_present(api_id=api_id, stage_name=stage_name) stage_description = test_data['STAGE_DESCRIPTION'] apigw_validator.assert_stage_description( api_id=api_id, stage_name=stage_name, expected_description=stage_description ) # test update updated_description = 'updated' + stage_description test_data['STAGE_DESCRIPTION'] = updated_description updated_stage_resource_data = load_apigatewayv2_resource( "stage", additional_replacements=test_data, ) logging.debug(f"updated http api stage resource: {updated_stage_resource_data}") # Update the k8s resource k8s.patch_custom_resource(stage_ref, updated_stage_resource_data) time.sleep(UPDATE_WAIT_AFTER_SECONDS) condition.assert_synced(stage_ref) # Let's check that the HTTP Api stage appears in Amazon API Gateway with updated description apigw_validator.assert_stage_description( api_id=api_id, stage_name=stage_name, expected_description=updated_description ) # test delete k8s.delete_custom_resource(stage_ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) assert not k8s.get_resource_exists(stage_ref) # HTTP Api stage should no longer appear in Amazon API Gateway apigw_validator.assert_stage_is_deleted(api_id=api_id, stage_name=stage_name)
def create_endpoint_test(self, sagemaker_client, xgboost_endpoint): (reference, resource, _) = xgboost_endpoint assert k8s.get_resource_exists(reference) # endpoint has correct arn and status endpoint_name = resource["spec"].get("endpointName", None) assert endpoint_name is not None assert (self._get_resource_endpoint_arn( resource) == self._describe_sagemaker_endpoint( sagemaker_client, endpoint_name)["EndpointArn"]) # endpoint transitions Creating -> InService state self._assert_endpoint_status_in_sync(sagemaker_client, endpoint_name, reference, self.status_creating) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_endpoint_status_in_sync(sagemaker_client, endpoint_name, reference, self.status_inservice) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
def create_notebook_test(self, notebook_instance): (reference, resource, _) = notebook_instance assert k8s.get_resource_exists(reference) assert k8s.get_resource_arn(resource) is not None # Create the resource and verify that its Pending notebook_instance_name = resource["spec"].get("notebookInstanceName", None) assert notebook_instance_name is not None notebook_description = get_notebook_instance(notebook_instance_name) assert notebook_description["NotebookInstanceStatus"] == "Pending" assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") self._assert_notebook_status_in_sync(notebook_instance_name, reference, "Pending") # wait for the resource to go to the InService state and make sure the operator is synced with sagemaker. self._assert_notebook_status_in_sync(notebook_instance_name, reference, "InService") assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True")
def test_crud_httpapi_using_import(self): test_data = REPLACEMENT_VALUES.copy() api_name = random_suffix_name("ack-test-importapi", 25) test_data['IMPORT_API_NAME'] = api_name test_data['IMPORT_API_TITLE'] = api_name api_ref, api_data = helper.import_api_ref_and_data(api_resource_name=api_name, replacement_values=test_data) logging.debug(f"imported http api resource. name: {api_name}, data: {api_data}") # test create k8s.create_custom_resource(api_ref, api_data) time.sleep(CREATE_API_WAIT_AFTER_SECONDS) assert k8s.wait_on_condition(api_ref, "ACK.ResourceSynced", "True", wait_periods=10) cr = k8s.get_resource(api_ref) assert cr is not None api_id = cr['status']['apiID'] # Let's check that the imported HTTP Api appears in Amazon API Gateway apigw_validator.assert_api_is_present(api_id=api_id) apigw_validator.assert_api_name( api_id=api_id, expected_api_name=api_name ) # test update updated_api_title = 'updated-' + api_name test_data['IMPORT_API_TITLE'] = updated_api_title updated_api_resource_data = load_apigatewayv2_resource( "import_api", additional_replacements=test_data, ) logging.debug(f"updated import http api resource: {updated_api_resource_data}") # Update the k8s resource k8s.patch_custom_resource(api_ref, updated_api_resource_data) time.sleep(UPDATE_WAIT_AFTER_SECONDS) condition.assert_synced(api_ref) # Let's check that the HTTP Api appears in Amazon API Gateway with updated title apigw_validator.assert_api_name( api_id=api_id, expected_api_name=updated_api_title ) # test delete k8s.delete_custom_resource(api_ref) time.sleep(DELETE_WAIT_AFTER_SECONDS) assert not k8s.get_resource_exists(api_ref) # HTTP Api should no longer appear in Amazon API Gateway apigw_validator.assert_api_is_deleted(api_id=api_id)
def test_rg_update_misc(self, rg_update_misc_input, rg_update_misc): (reference, _) = rg_update_misc assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # desired initial state pmw = rg_update_misc_input['PMW'] description = rg_update_misc_input['DESCRIPTION'] srl = int(rg_update_misc_input['SRL']) sw = rg_update_misc_input['SW'] # assert initial state assert_misc_fields(reference, rg_update_misc_input['RG_ID'], pmw, description, srl, sw) # change field values, wait for resource to sync pmw = "wed:10:00-wed:14:00" description = "description2" srl = 0 sw = "15:00-17:00" patch = { "spec": { "preferredMaintenanceWindow": pmw, "description": description, "snapshotRetentionLimit": srl, "snapshotWindow": sw } } _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert new state assert_misc_fields(reference, rg_update_misc_input['RG_ID'], pmw, description, srl, sw)
def test_rg_scale_horizontally(self, rg_scale_horizontally_input, rg_scale_horizontally): (reference, _) = rg_scale_horizontally assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert initial state rg = retrieve_replication_group(rg_scale_horizontally_input['RG_ID']) nng = int(rg_scale_horizontally_input['NUM_NODE_GROUPS']) assert len(rg['NodeGroups']) == nng # scale out nng += 1 patch = {"spec": {"numNodeGroups": nng}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert scale out complete rg = retrieve_replication_group(rg_scale_horizontally_input['RG_ID']) assert len(rg['NodeGroups']) == nng # scale in nng -= 2 patch = {"spec": {"numNodeGroups": nng}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert scale in complete rg = retrieve_replication_group(rg_scale_horizontally_input['RG_ID']) assert len(rg['NodeGroups']) == nng
def test_rg_scale_vertically(self, rg_scale_vertically_input, rg_scale_vertically): (reference, _) = rg_scale_vertically assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert initial state rg = retrieve_replication_group(rg_scale_vertically_input['RG_ID']) assert rg['CacheNodeType'] == "cache.t3.micro" # scale up cnt = "cache.t3.medium" patch = {"spec": {"cacheNodeType": cnt}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert scale up complete rg = retrieve_replication_group(rg_scale_vertically_input['RG_ID']) assert rg['CacheNodeType'] == cnt # scale down cnt = "cache.t3.small" patch = {"spec": {"cacheNodeType": cnt}} _ = k8s.patch_custom_resource(reference, patch) sleep(DEFAULT_WAIT_SECS) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True", wait_periods=90) # assert scale down complete rg = retrieve_replication_group(rg_scale_vertically_input['RG_ID']) assert rg['CacheNodeType'] == cnt
def create_endpoint_test(self, xgboost_endpoint): (reference, resource, _) = xgboost_endpoint assert k8s.get_resource_exists(reference) # endpoint has correct arn and status endpoint_name = resource["spec"].get("endpointName", None) assert endpoint_name is not None endpoint_desc = get_sagemaker_endpoint(endpoint_name) endpoint_arn = endpoint_desc["EndpointArn"] assert k8s.get_resource_arn(resource) == endpoint_arn # endpoint transitions Creating -> InService state assert_endpoint_status_in_sync(endpoint_name, reference, cfg.ENDPOINT_STATUS_CREATING) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "False") assert_endpoint_status_in_sync(endpoint_name, reference, cfg.ENDPOINT_STATUS_INSERVICE) assert k8s.wait_on_condition(reference, "ACK.ResourceSynced", "True") resource_tags = resource["spec"].get("tags", None) assert_tags_in_sync(endpoint_arn, resource_tags)