示例#1
0
 def test_preconfigured_spark_app_submit(self):
     section = str(uuid.uuid4())
     _app_config = self.spark_app_config_template(master=SparkMaster.local(1), name=section)
     self._run_(
         application=SparkApplication(
             config=_app_config,
             name=section)
     )
示例#2
0
    def test_spark_submit_command_generation(self):
        _command = "spark-submit " \
                   "--master local[10] " \
                   "--class test.SparkApp " \
                   "--name test_app " \
                   "--jars lib001.jar,lib002.jar,lib003.jar " \
                   "--files dim001.cache.txt,dim002.cache.txt " \
                   "--properties-file spark.app.configs " \
                   "--conf \"spark.app.name=test_app spark.executor.memory=512m\" " \
                   "application.jar " \
                   "10"

        spark = SparkApplication(executor=mock_executor(expected_command=_command)) \
            .master(SparkMaster.local(10)) \
            .application(application_jar='application.jar', app_name="test_app", main_class="test.SparkApp") \
            .classpath("lib001.jar", "lib002.jar").classpath("lib003.jar") \
            .add_files("dim001.cache.txt") \
            .add_files("dim002.cache.txt") \
            .config_file(path="spark.app.configs") \
            .with_config_option("spark.app.name", "test_app") \
            .with_config_option("spark.executor.memory", "512m")
        spark.run(10)
示例#3
0
 def test_preconfigured_spark_app_submit(self):
     section = str(uuid.uuid4())
     _app_config = self.spark_app_config_template(
         master=SparkMaster.local(1), name=section)
     self._run_(
         application=SparkApplication(config=_app_config, name=section))
示例#4
0
 def test_spark_app_submit(self):
     # self.run_test(application=self._spark_application_template_(SparkMaster.local()))
     self._run_(application=self._spark_application_template_(
         SparkMaster.local(1)))
示例#5
0
class TestSparkAppSubmit(TestCase):
    masters = [
        SparkMaster.local(),
        SparkMaster.local(1),
        SparkMaster.yarn_client(),
        SparkMaster.yarn_cluster()
    ]

    input_path = os.path.join(os.path.dirname(__file__), "resources", "spark",
                              "input.txt")

    def _spark_application_template_(self, master):
        return SparkApplication().application(
            application_jar=os.path.join(os.path.dirname(__file__),
                                         "resources", "spark",
                                         "SparkExample.jar"),
            main_class="example.spark.WordCounter").master(master)

    def spark_app_config_template(self, master, name=str(uuid.uuid4())):
        _config = Configuration.create()
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_MASTER,
                    value=master)
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_APPLICATION_JAR,
                    value=os.path.join(os.path.dirname(__file__), "resources",
                                       "spark", "SparkExample.jar"))
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_MAIN_CLASS,
                    value="example.spark.WordCounter")
        return _config

    @skipUnless(has_command('spark-submit'),
                "Cannot find spark-submit command-line utility")
    def test_spark_app_submit(self):
        # self.run_test(application=self._spark_application_template_(SparkMaster.local()))
        self._run_(application=self._spark_application_template_(
            SparkMaster.local(1)))
        # self.run_test(application=self._spark_application_template_(SparkMaster.yarn_cluster()))
        # self.run_test(application=self._spark_application_template_(SparkMaster.yarn_client()))

    @skipUnless(has_command('spark-submit'),
                "Cannot find spark-submit command-line utility")
    def test_preconfigured_spark_app_submit(self):
        section = str(uuid.uuid4())
        _app_config = self.spark_app_config_template(
            master=SparkMaster.local(1), name=section)
        self._run_(
            application=SparkApplication(config=_app_config, name=section))

    def _run_(self, application, test_id=str(uuid.uuid4())):
        basedir = LocalFS(os.path.join("/tmp", "test_spark", test_id))
        try:
            basedir.create_directory()
            _app_input = self.input_path
            _app_output_dir = os.path.join(basedir.path, "output")
            status = application.run('file:' + _app_input,
                                     'file:' + _app_output_dir)
            self.assertTrue(status.is_ok(), status.stderr())
            self.assertTrue(os.path.exists(_app_output_dir), status.stderr())
        finally:
            basedir.delete_directory()
示例#6
0
 def test_spark_app_submit(self):
     # self.run_test(application=self._spark_application_template_(SparkMaster.local()))
     self._run_(application=self._spark_application_template_(SparkMaster.local(1)))