Exemplo n.º 1
0
def test_driver_executor_tls():
    '''
    Put keystore and truststore as secrets in DC/OS secret store.
    Run SparkPi job with TLS enabled, referencing those secrets.
    Make sure other secrets still show up.
    '''
    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'pi_with_secret.py')
    python_script_url = utils.upload_file(python_script_path)
    resources_folder = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), 'resources')
    keystore_file = 'server.jks'
    truststore_file = 'trust.jks'
    keystore_path = os.path.join(resources_folder,
                                 '{}.base64'.format(keystore_file))
    truststore_path = os.path.join(resources_folder,
                                   '{}.base64'.format(truststore_file))
    keystore_secret = '__dcos_base64__keystore'
    truststore_secret = '__dcos_base64__truststore'
    my_secret = 'mysecret'
    my_secret_content = 'secretcontent'
    shakedown.run_dcos_command(
        'security secrets create /{} --value-file {}'.format(
            keystore_secret, keystore_path))
    shakedown.run_dcos_command(
        'security secrets create /{} --value-file {}'.format(
            truststore_secret, truststore_path))
    shakedown.run_dcos_command('security secrets create /{} --value {}'.format(
        my_secret, my_secret_content))
    password = '******'
    try:
        utils.run_tests(
            app_url=python_script_url,
            app_args="30 {} {}".format(my_secret, my_secret_content),
            expected_output="Pi is roughly 3",
            args=[
                "--keystore-secret-path",
                keystore_secret,
                "--truststore-secret-path",
                truststore_secret,
                "--private-key-password",
                format(password),
                "--keystore-password",
                format(password),
                "--truststore-password",
                format(password),
                "--conf",
                "spark.mesos.driver.secret.names={}".format(my_secret),
                "--conf",
                "spark.mesos.driver.secret.filenames={}".format(my_secret),
                "--conf",
                "spark.mesos.driver.secret.envkeys={}".format(my_secret),
            ])
    finally:
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(keystore_secret))
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(truststore_secret))
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(my_secret))
Exemplo n.º 2
0
def test_cli_multiple_spaces():
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="30",
                    expected_output="Pi is roughly 3",
                    args=[
                        "--conf ", "spark.cores.max=2", " --class  ",
                        "org.apache.spark.examples.SparkPi"
                    ])
Exemplo n.º 3
0
def test_cni():
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="",
                    expected_output="Pi is roughly 3",
                    args=[
                        "--conf", "spark.mesos.network.name=dcos", "--class",
                        "org.apache.spark.examples.SparkPi"
                    ])
Exemplo n.º 4
0
def _run_terasort():
    jar_url = TERASORT_JAR
    utils.run_tests(
        app_url=jar_url,
        app_args="hdfs:///terasort_in hdfs:///terasort_out",
        expected_output="",
        app_name=SOAK_SPARK_APP_NAME,
        args=(["--class", "com.github.ehiggs.spark.terasort.TeraSort"] +
              COMMON_ARGS))
Exemplo n.º 5
0
def _run_teravalidate():
    jar_url = TERASORT_JAR
    utils.run_tests(
        app_url=jar_url,
        app_args="hdfs:///terasort_out hdfs:///terasort_validate",
        expected_output="partitions are properly sorted",
        app_name=SOAK_SPARK_APP_NAME,
        args=(["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] +
              COMMON_ARGS))
Exemplo n.º 6
0
def _run_teragen():
    jar_url = TERASORT_JAR
    input_size = os.getenv('TERASORT_INPUT_SIZE', '1g')
    utils.run_tests(
        app_url=jar_url,
        app_args="{} hdfs:///terasort_in".format(input_size),
        expected_output="Number of records written",
        app_name=SOAK_SPARK_APP_NAME,
        args=(["--class", "com.github.ehiggs.spark.terasort.TeraGen"] +
              COMMON_ARGS))
Exemplo n.º 7
0
def test_python():
    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'pi_with_include.py')
    python_script_url = utils.upload_file(python_script_path)
    py_file_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                'PySparkTestInclude.py')
    py_file_url = utils.upload_file(py_file_path)
    utils.run_tests(app_url=python_script_url,
                    app_args="30",
                    expected_output="Pi is roughly 3",
                    args=["--py-files", py_file_url])
Exemplo n.º 8
0
def test_history():
    job_args = [
        "--class", "org.apache.spark.examples.SparkPi", "--conf",
        "spark.eventLog.enabled=true", "--conf",
        "spark.eventLog.dir=hdfs://hdfs/history"
    ]
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    app_name="/spark",
                    args=(job_args + KERBEROS_ARGS))
Exemplo n.º 9
0
def test_jar(app_name=utils.SPARK_APP_NAME):
    master_url = ("https"
                  if utils.is_strict() else "http") + "://leader.mesos:5050"
    spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format(
        master_url, shakedown.dcos_acs_token())
    jar_url = utils.upload_file(os.getenv('TEST_JAR_PATH'))
    utils.run_tests(
        app_url=jar_url,
        app_args=spark_job_runner_args,
        expected_output="All tests passed",
        app_name=app_name,
        args=[
            "--class",
            'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'
        ])
Exemplo n.º 10
0
def test_rpc_auth():
    secret_name = "sparkauth"

    rc, stdout, stderr = sdk_cmd.run_raw_cli("{pkg} secret /{secret}".format(
        pkg=utils.SPARK_PACKAGE_NAME, secret=secret_name))
    assert rc == 0, "Failed to generate Spark auth secret, stderr {err} stdout {out}".format(
        err=stderr, out=stdout)

    args = [
        "--executor-auth-secret", secret_name, "--class",
        "org.apache.spark.examples.SparkPi"
    ]

    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    app_name="/spark",
                    args=args)
Exemplo n.º 11
0
def test_terasort_suite():
    jar_url = 'https://downloads.mesosphere.io/spark/examples/spark-terasort-1.1-jar-with-dependencies_2.11.jar'

    teragen_args = ["--class", "com.github.ehiggs.spark.terasort.TeraGen"
                    ] + KERBEROS_ARGS
    utils.run_tests(app_url=jar_url,
                    app_args="1g hdfs:///terasort_in",
                    expected_output="Number of records written",
                    args=teragen_args)

    terasort_args = ["--class", "com.github.ehiggs.spark.terasort.TeraSort"
                     ] + KERBEROS_ARGS
    utils.run_tests(app_url=jar_url,
                    app_args="hdfs:///terasort_in hdfs:///terasort_out",
                    expected_output="",
                    args=terasort_args)

    teravalidate_args = [
        "--class", "com.github.ehiggs.spark.terasort.TeraValidate"
    ] + KERBEROS_ARGS
    utils.run_tests(app_url=jar_url,
                    app_args="hdfs:///terasort_out hdfs:///terasort_validate",
                    expected_output="partitions are properly sorted",
                    args=teravalidate_args)
Exemplo n.º 12
0
def test_sparkPi(app_name=utils.SPARK_APP_NAME):
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    app_name=app_name,
                    args=["--class org.apache.spark.examples.SparkPi"])
Exemplo n.º 13
0
def test_s3():
    def make_credential_secret(envvar, secret_path):
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            "security secrets create {p} -v {e}".format(p=secret_path,
                                                        e=os.environ[envvar]))
        assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format(
            secret=secret_path, envvar=envvar, err=stderr, out=stdout)

    LOGGER.info("Creating AWS secrets")

    aws_access_key_secret_path = "aws_access_key_id"
    aws_secret_access_key_path = "aws_secret_access_key"

    make_credential_secret(
        envvar="AWS_ACCESS_KEY_ID",
        secret_path="/{}".format(aws_access_key_secret_path))
    make_credential_secret(
        envvar="AWS_SECRET_ACCESS_KEY",
        secret_path="/{}".format(aws_secret_access_key_path))

    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'),
                                                   s3.s3n_url("linecount-out"))

    args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.mesos.driver.secret.names=/{key},/{secret}".format(
            key=aws_access_key_secret_path,
            secret=aws_secret_access_key_path), "--conf",
        "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
        "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    assert len(list(s3.list("linecount-out"))) > 0

    app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)
Exemplo n.º 14
0
def test_r():
    r_script_path = os.path.join(THIS_DIR, 'jobs', 'R', 'dataframe.R')
    r_script_url = utils.upload_file(r_script_path)
    utils.run_tests(app_url=r_script_url,
                    app_args='',
                    expected_output="Justin")
Exemplo n.º 15
0
def test_pipeline(kerberos_flag,
                  stop_count,
                  jar_uri,
                  keytab_secret,
                  jaas_uri=None):
    stop_count = str(stop_count)
    kerberized = True if kerberos_flag == "true" else False
    broker_dns = _kafka_broker_dns()
    topic = "top1"

    big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt"

    # arguments to the application
    producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag])

    uris = "spark.mesos.uris=http://norvig.com/big.txt"

    if kerberized and jaas_uri is None:
        jaas_path = os.path.join(THIS_DIR, "resources",
                                 "spark-kafka-client-jaas.conf")
        s3.upload_file(jaas_path)
        _uri = s3.s3_http_url("spark-kafka-client-jaas.conf")
        uris += ",{}".format(_uri)
    else:
        uris += ",{}".format(jaas_uri)

    common_args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf",
        "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris
    ]

    kerberos_args = [
        "--conf",
        "spark.mesos.driver.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.driver.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.executor.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.executor.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME),
        "--conf",
        "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.driver.extraJavaOptions=-Djava.security.auth.login.config="
        "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
        "--conf",
        "spark.executor.extraJavaOptions="
        "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
    ]

    producer_config = [
        "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2",
        "--class", "KafkaFeeder"
    ] + common_args

    if kerberized:
        producer_config += kerberos_args

    producer_id = utils.submit_job(app_url=jar_uri,
                                   app_args=producer_args,
                                   app_name=utils.SPARK_APP_NAME,
                                   args=producer_config)

    shakedown.wait_for(lambda: _producer_launched(),
                       ignore_exceptions=False,
                       timeout_seconds=600)
    shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1),
                       ignore_exceptions=False,
                       timeout_seconds=600)

    consumer_config = [
        "--conf", "spark.cores.max=4", "--class", "KafkaConsumer"
    ] + common_args

    if kerberized:
        consumer_config += kerberos_args

    consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag])

    utils.run_tests(app_url=jar_uri,
                    app_args=consumer_args,
                    expected_output="Read {} words".format(stop_count),
                    app_name=utils.SPARK_APP_NAME,
                    args=consumer_config)

    utils.kill_driver(producer_id, utils.SPARK_APP_NAME)