def upload_file(file_path): LOGGER.info("Uploading {} to s3://{}/{}".format( file_path, os.environ['S3_BUCKET'], os.environ['S3_PREFIX'])) spark_s3.upload_file(file_path) basename = os.path.basename(file_path) return spark_s3.http_url(basename)
def _scala_test_jar_url(): return spark_s3.http_url(os.path.basename(os.environ["SCALA_TEST_JAR_PATH"]))
def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, spark_service_name, jaas_uri=None): stop_count = str(stop_count) kerberized = True if kerberos_flag == "true" else False broker_dns = sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME, KAFKA_SERVICE_NAME, 'endpoints broker', json=True)['dns'][0] topic = "top1" big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt" # arguments to the application producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag]) uris = "spark.mesos.uris={}".format(big_file_url) if kerberized and jaas_uri is None: jaas_path = os.path.join(THIS_DIR, "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) _uri = s3.http_url("spark-kafka-client-jaas.conf") uris += ",{}".format(_uri) else: uris += ",{}".format(jaas_uri) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] kerberos_args = [ "--conf", "spark.mesos.driver.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.driver.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.executor.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.executor.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.task.labels=DCOS_SPACE:/{}".format(spark_service_name), "--conf", "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.driver.extraJavaOptions=-Djava.security.auth.login.config=" "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", "--conf", "spark.executor.extraJavaOptions=" "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", ] producer_config = [ "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2", "--class", "KafkaFeeder" ] + common_args if kerberized: producer_config += kerberos_args producer_id = utils.submit_job(app_url=jar_uri, app_args=producer_args, service_name=spark_service_name, args=producer_config) sdk_tasks.check_running(KAFKA_SERVICE_NAME, 1, timeout_seconds=600) consumer_config = [ "--conf", "spark.cores.max=4", "--class", "KafkaConsumer" ] + common_args if kerberized: consumer_config += kerberos_args consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag]) utils.run_tests(app_url=jar_uri, app_args=consumer_args, expected_output="Read {} words".format(stop_count), service_name=spark_service_name, args=consumer_config) utils.kill_driver(producer_id, spark_service_name)
def dcos_test_jar_url(): _check_tests_assembly() if DCOS_SPARK_TEST_JAR_URL is None: return spark_s3.http_url(os.path.basename(DCOS_SPARK_TEST_JAR_PATH)) return DCOS_SPARK_TEST_JAR_URL
def upload_file(file_path): spark_s3.upload_file(file_path) return spark_s3.http_url(os.path.basename(file_path))
def upload_jaas(): jaas_path = os.path.join(THIS_DIR, "..", "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) return s3.http_url("spark-kafka-client-jaas.conf")