Пример #1
0
    def test_spark_step_without_mr_job_script(self):
        steps = MRNullSpark()._steps_desc()

        # need to be able to call the script's spark() method
        self.assertRaises(ValueError,
                          EMRJobRunner,
                          steps=steps,
                          stdin=BytesIO())
Пример #2
0
    def test_spark_mr_job(self):
        job = MRNullSpark()
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '<step 0 input>', '<step 0 output>'
            ])
Пример #3
0
    def test_spark_passthrough_arg(self):
        job = MRNullSpark(['--extra-spark-arg=--verbose'])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '--extra-spark-arg=--verbose',
                '<step 0 input>', '<step 0 output>'
            ])
Пример #4
0
    def test_dont_upload_mrjob_zip(self):
        job = MRNullSpark(['-r', 'spark', '--spark-master', 'yarn'])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            self.assertTrue(exists(runner._mrjob_zip_path))

            self.assertNotIn(runner._mrjob_zip_path,
                             runner._upload_mgr.path_to_uri())

            self.assertIn(runner._mrjob_zip_path, runner._spark_submit_args(0))
Пример #5
0
    def test_eggs(self):
        egg1_path = self.makefile('dragon.egg')
        egg2_path = self.makefile('horton.egg')

        job = MRNullSpark(
            ['-r', 'spark', '--py-files',
             '%s,%s' % (egg1_path, egg2_path)])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            py_files_arg = '%s,%s,%s' % (egg1_path, egg2_path,
                                         runner._mrjob_zip_path)
            self.assertIn(py_files_arg, runner._spark_submit_args(0))
Пример #6
0
    def test_spark_file_arg(self):
        foo_path = self.makefile('foo')

        job = MRNullSpark(['--extra-file', foo_path])
        job.sandbox()

        with job.make_runner() as runner:
            self.assertEqual(runner._spark_script_args(0), [
                '--step-num=0', '--spark', '--extra-file', 'foo',
                '<step 0 input>', '<step 0 output>'
            ])

            name_to_path = runner._working_dir_mgr.name_to_path('file')
            self.assertIn('foo', name_to_path)
            self.assertEqual(name_to_path['foo'], foo_path)
Пример #7
0
    def test_no_spark_steps(self):
        # just a sanity check; _STEP_TYPES is tested in a lot of ways
        job = MRNullSpark(['-r', 'local'])
        job.sandbox()

        self.assertRaises(NotImplementedError, job.make_runner)