Exemplo n.º 1
0
class TestSparkAppSubmit(TestCase):
    masters = [
        SparkMaster.local(),
        SparkMaster.local(1),
        SparkMaster.yarn_client(),
        SparkMaster.yarn_cluster()
    ]

    input_path = os.path.join(os.path.dirname(__file__), "resources", "spark",
                              "input.txt")

    def _spark_application_template_(self, master):
        return SparkApplication().application(
            application_jar=os.path.join(os.path.dirname(__file__),
                                         "resources", "spark",
                                         "SparkExample.jar"),
            main_class="example.spark.WordCounter").master(master)

    def spark_app_config_template(self, master, name=str(uuid.uuid4())):
        _config = Configuration.create()
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_MASTER,
                    value=master)
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_APPLICATION_JAR,
                    value=os.path.join(os.path.dirname(__file__), "resources",
                                       "spark", "SparkExample.jar"))
        _config.set(section=name,
                    key=TaskOptions.SPARK_APP_CONFIG_MAIN_CLASS,
                    value="example.spark.WordCounter")
        return _config

    @skipUnless(has_command('spark-submit'),
                "Cannot find spark-submit command-line utility")
    def test_spark_app_submit(self):
        # self.run_test(application=self._spark_application_template_(SparkMaster.local()))
        self._run_(application=self._spark_application_template_(
            SparkMaster.local(1)))
        # self.run_test(application=self._spark_application_template_(SparkMaster.yarn_cluster()))
        # self.run_test(application=self._spark_application_template_(SparkMaster.yarn_client()))

    @skipUnless(has_command('spark-submit'),
                "Cannot find spark-submit command-line utility")
    def test_preconfigured_spark_app_submit(self):
        section = str(uuid.uuid4())
        _app_config = self.spark_app_config_template(
            master=SparkMaster.local(1), name=section)
        self._run_(
            application=SparkApplication(config=_app_config, name=section))

    def _run_(self, application, test_id=str(uuid.uuid4())):
        basedir = LocalFS(os.path.join("/tmp", "test_spark", test_id))
        try:
            basedir.create_directory()
            _app_input = self.input_path
            _app_output_dir = os.path.join(basedir.path, "output")
            status = application.run('file:' + _app_input,
                                     'file:' + _app_output_dir)
            self.assertTrue(status.is_ok(), status.stderr())
            self.assertTrue(os.path.exists(_app_output_dir), status.stderr())
        finally:
            basedir.delete_directory()