示例#1
0
def test_classpath_script_works():

    p = subprocess.Popen(["sagemakerpyspark-jars"], stdout=subprocess.PIPE)
    output, errors = p.communicate()

    jars = sagemaker_pyspark.classpath_jars()

    script_jars = output.decode('utf-8').split(",")
    assert len(jars) == len(script_jars)
    assert jars[0] == script_jars[0]
示例#2
0
def test_classpath_script_can_use_separators():

    p = subprocess.Popen("sagemakerpyspark-jars :".split(),
                         stdout=subprocess.PIPE)
    output, errors = p.communicate()

    jars = sagemaker_pyspark.classpath_jars()

    script_jars = output.decode('utf-8').split(":")
    assert len(jars) == len(script_jars)
    assert jars[0] == script_jars[0]
def with_spark_context():
    os.environ['SPARK_CLASSPATH'] = ":".join(classpath_jars())
    conf = (SparkConf().set("spark.driver.extraClassPath",
                            os.environ['SPARK_CLASSPATH']))

    if SparkContext._active_spark_context is None:
        SparkContext(conf=conf)

    yield SparkContext._active_spark_context

    # TearDown
    SparkContext.stop(SparkContext._active_spark_context)
import os
import boto3

from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession

import sagemaker
from sagemaker import get_execution_role
import sagemaker_pyspark

role = get_execution_role()

# Configure Spark to use the SageMaker Spark dependency jars
jars = sagemaker_pyspark.classpath_jars()

classpath = ":".join(sagemaker_pyspark.classpath_jars())

# See the SageMaker Spark Github to learn how to connect to EMR from a notebook instance
spark = SparkSession.builder.config("spark.driver.extraClassPath", classpath)\
    .master("local[*]").getOrCreate()

spark

import boto3

cn_regions = ['cn-north-1', 'cn-northwest-1']
region = boto3.Session().region_name
endpoint_domain = 'com.cn' if region in cn_regions else 'com'
spark._jsc.hadoopConfiguration().set(
    'fs.s3a.endpoint', 's3.{}.amazonaws.{}'.format(region, endpoint_domain))