def test_marketplace_transform_job_from_model_package(sagemaker_session, cpu_instance_type): data_path = os.path.join(DATA_DIR, "marketplace", "training") shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0], axis=1) TRANSFORM_WORKDIR = DATA_DIR + "/marketplace/transform" shape.to_csv(TRANSFORM_WORKDIR + "/batchtransform_test.csv", index=False, header=False) transform_input = sagemaker_session.upload_data( TRANSFORM_WORKDIR, key_prefix="integ-test-data/marketplace/transform") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] model_package_arn = MODEL_PACKAGE_ARN.format( partition=_aws_partition(region), region=region, account=account) model = ModelPackage( role="SageMakerRole", model_package_arn=model_package_arn, sagemaker_session=sagemaker_session, ) transformer = model.transformer(1, cpu_instance_type) transformer.transform(transform_input, content_type="text/csv") transformer.wait()
def test_marketplace_model(sagemaker_session, cpu_instance_type): region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] model_package_arn = MODEL_PACKAGE_ARN.format( partition=_aws_partition(region), region=region, account=account ) def predict_wrapper(endpoint, session): return sagemaker.Predictor(endpoint, session, serializer=CSVSerializer()) model = ModelPackage( role="SageMakerRole", model_package_arn=model_package_arn, sagemaker_session=sagemaker_session, predictor_cls=predict_wrapper, ) endpoint_name = "test-marketplace-model-endpoint{}".format(sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) data_path = os.path.join(DATA_DIR, "marketplace", "training") shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode("utf-8"))
def test_marketplace_transform_job(sagemaker_session, cpu_instance_type): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) algo = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") shape = pandas.read_csv(data_path + "/iris.csv", header=None).drop([0], axis=1) transform_workdir = DATA_DIR + "/marketplace/transform" shape.to_csv(transform_workdir + "/batchtransform_test.csv", index=False, header=False) transform_input = algo.sagemaker_session.upload_data( transform_workdir, key_prefix="integ-test-data/marketplace/transform") algo.fit({"training": train_input}) transformer = algo.transformer(1, cpu_instance_type) transformer.transform(transform_input, content_type="text/csv") transformer.wait()
def _create_kms_key(kms_client, account_id, region, role_arn=None, sagemaker_role="SageMakerRole", alias=KEY_ALIAS): if role_arn: principal = PRINCIPAL_TEMPLATE.format( partition=utils._aws_partition(region), account_id=account_id, role_arn=role_arn, sagemaker_role=sagemaker_role, ) else: principal = '"{account_id}"'.format(account_id=account_id) response = kms_client.create_key( Policy=KEY_POLICY.format(id=POLICY_NAME, principal=principal, sagemaker_role=sagemaker_role), Description="KMS key for SageMaker Python SDK integ tests", ) key_arn = response["KeyMetadata"]["Arn"] if alias: kms_client.create_alias(AliasName="alias/" + alias, TargetKeyId=key_arn) return key_arn
def bucket_with_encryption(sagemaker_session, sagemaker_role): boto_session = sagemaker_session.boto_session region = boto_session.region_name sts_client = boto_session.client( "sts", region_name=region, endpoint_url=utils.sts_regional_endpoint(region)) account = sts_client.get_caller_identity()["Account"] role_arn = sts_client.get_caller_identity()["Arn"] kms_client = boto_session.client("kms") kms_key_arn = _create_kms_key(kms_client, account, region, role_arn, sagemaker_role, None) region = boto_session.region_name bucket_name = "sagemaker-{}-{}-with-kms".format(region, account) sagemaker_session._create_s3_bucket_if_it_does_not_exist( bucket_name=bucket_name, region=region) s3_client = boto_session.client("s3", region_name=region) s3_client.put_bucket_encryption( Bucket=bucket_name, ServerSideEncryptionConfiguration={ "Rules": [{ "ApplyServerSideEncryptionByDefault": { "SSEAlgorithm": "{partition}:kms".format( partition=utils._aws_partition(region)), "KMSMasterKeyID": kms_key_arn, } }] }, ) s3_client.put_bucket_policy( Bucket=bucket_name, Policy=KMS_BUCKET_POLICY.format(partition=utils._aws_partition(region), bucket_name=bucket_name), ) yield "s3://" + bucket_name, kms_key_arn kms_client.schedule_key_deletion(KeyId=kms_key_arn, PendingWindowInDays=7)
def test_marketplace_attach(sagemaker_session, cpu_instance_type): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) mktplace = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") mktplace.fit({"training": train_input}, wait=False) training_job_name = mktplace.latest_training_job.name print("Waiting to re-attach to the training job: %s" % training_job_name) time.sleep(20) endpoint_name = "test-marketplace-estimator{}".format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): print("Re-attaching now to: %s" % training_job_name) estimator = AlgorithmEstimator.attach( training_job_name=training_job_name, sagemaker_session=sagemaker_session) predictor = estimator.deploy( 1, cpu_instance_type, endpoint_name=endpoint_name, serializer=sagemaker.predictor.csv_serializer, ) shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode("utf-8"))
def test_marketplace_estimator(sagemaker_session, cpu_instance_type): with timeout(minutes=15): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format(partition=_aws_partition(region), region=region, account=account) algo = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", train_instance_count=1, train_instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, ) train_input = algo.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train") algo.fit({"training": train_input}) endpoint_name = "test-marketplace-estimator{}".format( sagemaker_timestamp()) with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=20): predictor = algo.deploy(1, cpu_instance_type, endpoint_name=endpoint_name) shape = pandas.read_csv(os.path.join(data_path, "iris.csv"), header=None) a = [50 * i for i in range(3)] b = [40 + i for i in range(10)] indices = [i + j for i, j in itertools.product(a, b)] test_data = shape.iloc[indices[:-1]] test_x = test_data.iloc[:, 1:] print(predictor.predict(test_x.values).decode("utf-8"))
def test_marketplace_tuning_job(sagemaker_session, cpu_instance_type): data_path = os.path.join(DATA_DIR, "marketplace", "training") region = sagemaker_session.boto_region_name account = REGION_ACCOUNT_MAP[region] algorithm_arn = ALGORITHM_ARN.format( partition=_aws_partition(region), region=region, account=account ) mktplace = AlgorithmEstimator( algorithm_arn=algorithm_arn, role="SageMakerRole", instance_count=1, instance_type=cpu_instance_type, sagemaker_session=sagemaker_session, base_job_name="test-marketplace", ) train_input = mktplace.sagemaker_session.upload_data( path=data_path, key_prefix="integ-test-data/marketplace/train" ) mktplace.set_hyperparameters(max_leaf_nodes=10) hyperparameter_ranges = {"max_leaf_nodes": IntegerParameter(1, 100000)} tuner = HyperparameterTuner( estimator=mktplace, base_tuning_job_name="byo", objective_metric_name="validation:accuracy", hyperparameter_ranges=hyperparameter_ranges, max_jobs=2, max_parallel_jobs=2, ) tuner.fit({"training": train_input}, include_cls_metadata=False) time.sleep(15) tuner.wait()
def _add_role_to_policy(kms_client, account_id, role_arn, region, alias=KEY_ALIAS, sagemaker_role="SageMakerRole"): key_id = _get_kms_key_id(kms_client, alias) policy = kms_client.get_key_policy(KeyId=key_id, PolicyName=POLICY_NAME) policy = json.loads(policy["Policy"]) principal = policy["Statement"][0]["Principal"]["AWS"] if role_arn not in principal or sagemaker_role not in principal: principal = PRINCIPAL_TEMPLATE.format( partition=utils._aws_partition(region), account_id=account_id, role_arn=role_arn, sagemaker_role=sagemaker_role, ) kms_client.put_key_policy( KeyId=key_id, PolicyName=POLICY_NAME, Policy=KEY_POLICY.format(id=POLICY_NAME, principal=principal), )