示例#1
0
 def test_jobconf(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, jobconf=jobconf)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-jobconf', 'BAX=Arnold',
                   '-jobconf', 'BAZ=qux',
                   '-jobconf', 'FOO=bar',])
示例#2
0
 def test_cmdenv(self):
     cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, cmdenv=cmdenv)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-cmdenv', 'BAX=Arnold',
                   '-cmdenv', 'BAZ=qux',
                   '-cmdenv', 'FOO=bar',])
示例#3
0
    def test_two_files(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._invoke_sort([self.a, self.b], self.out)

        with open(self.out) as out_f:
            self.assertEqual(list(out_f), ["A\n", "B\n", "alligator\n", "apple\n", "ball\n", "banana\n"])
示例#4
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n',
                          'alligator\n',
                          'apple\n'])
示例#5
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = MRJobRunner(conf_path=False, hadoop_output_format=format)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-outputformat', format])
     # test multi-step job
     assert_equal(runner._hadoop_conf_args(0, 2), [])
     assert_equal(runner._hadoop_conf_args(1, 2),
                  ['-outputformat', format])
示例#6
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._invoke_sort([self.a], self.out)

        with open(self.out) as out_f:
            self.assertEqual(list(out_f),
                             ['A\n',
                              'alligator\n',
                              'apple\n'])
示例#7
0
    def test_two_files(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a, self.b], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n',
                          'B\n',
                          'alligator\n',
                          'apple\n',
                          'ball\n',
                          'banana\n'])
示例#8
0
 def test_hadoop_extra_args_comes_first(self):
     runner = MRJobRunner(
         conf_path=False,
         cmdenv={'FOO': 'bar'},
         hadoop_input_format='FooInputFormat',
         hadoop_output_format='BarOutputFormat',
         jobconf={'baz': 'quz'},
         hadoop_extra_args=['-libjar', 'qux.jar'])
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args(0, 1)
     assert_equal(conf_args[:2], ['-libjar', 'qux.jar'])
     assert_equal(len(conf_args), 10)
示例#9
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        with no_handlers_for_logger():
            # sometimes we get a broken pipe error (IOError) on PyPy
            self.assertRaises((CalledProcessError, IOError),
                              runner._invoke_sort, [self.a, self.b], self.out)
示例#10
0
 def test_default(self):
     runner = MRJobRunner()
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True),
                      [sys.executable])
示例#11
0
 def test_environment_variables_non_windows(self):
     runner = MRJobRunner(conf_path=False)
     self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
示例#12
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = MRJobRunner(conf_path=False, hadoop_extra_args=extra_args)
     assert_equal(runner._hadoop_conf_args(0, 1), extra_args)
示例#13
0
    def test_environment_variables_non_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
示例#14
0
 def test_no_files(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertRaises(ValueError, runner._invoke_sort, [], self.out)
示例#15
0
 def test_default(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertEqual(runner._bootstrap_mrjob(), True)
示例#16
0
 def test_interpreter(self):
     runner = MRJobRunner(conf_paths=[], interpreter=['ruby'])
     self.assertEqual(runner._bootstrap_mrjob(), False)
示例#17
0
 def test_steps_python_bin(self):
     runner = MRJobRunner(steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True), ['python', '-v'])
示例#18
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=['ruby', '-v'],
                          steps_interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby', '-v'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#19
0
 def test_default(self):
     runner = MRJobRunner()
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True),
                      [sys.executable])
示例#20
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=["ruby", "-v"], steps_interpreter=["ruby"])
     self.assertEqual(runner._interpreter(), ["ruby", "-v"])
     self.assertEqual(runner._interpreter(steps=True), ["ruby"])
示例#21
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=["ruby"], steps_python_bin=["python", "-v"])
     self.assertEqual(runner._interpreter(), ["ruby"])
     self.assertEqual(runner._interpreter(steps=True), ["ruby"])
示例#22
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_path=False)
        assert_raises(CalledProcessError,
                      runner._invoke_sort, [self.a, self.b], self.out)
示例#23
0
 def test_no_files(self):
     runner = MRJobRunner(conf_path=False)
     assert_raises(ValueError,
                   runner._invoke_sort, [], self.out)
示例#24
0
 def test_environment_variables_windows(self):
     runner = MRJobRunner(conf_path=False)
     runner._sort_is_windows_sort = True
     self.environment_variable_checks(runner, ['TMP'])
示例#25
0
 def test_python_bin(self):
     runner = MRJobRunner(python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['python', '-v'])
     self.assertEqual(runner._interpreter(steps=True), [sys.executable])
示例#26
0
    def test_environment_variables_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._sort_is_windows_sort = True
        self.environment_variable_checks(runner, ['TMP'])
示例#27
0
 def test_steps_python_bin(self):
     runner = MRJobRunner(steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True), ['python', '-v'])
示例#28
0
 def test_python_bin(self):
     runner = MRJobRunner(python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['python', '-v'])
     self.assertEqual(runner._interpreter(steps=True), [sys.executable])
示例#29
0
 def test_interpreter(self):
     runner = MRJobRunner(interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#30
0
 def test_interpreter(self):
     runner = MRJobRunner(interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#31
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=['ruby', '-v'],
                          steps_interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby', '-v'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#32
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=['ruby'],
                          steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#33
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=['ruby'],
                          steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
示例#34
0
 def test_no_bootstrap_mrjob(self):
     runner = MRJobRunner(conf_paths=[], bootstrap_mrjob=False)
     self.assertEqual(runner._bootstrap_mrjob(), False)
示例#35
0
 def test_default(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertEqual(runner._bootstrap_mrjob(), True)
示例#36
0
 def test_bootstrap_mrjob_overrides_interpreter(self):
     runner = MRJobRunner(
         conf_paths=[], interpreter=['ruby'], bootstrap_mrjob=True)
     self.assertEqual(runner._bootstrap_mrjob(), True)
示例#37
0
 def test_no_bootstrap_mrjob(self):
     runner = MRJobRunner(conf_paths=[], bootstrap_mrjob=False)
     self.assertEqual(runner._bootstrap_mrjob(), False)
示例#38
0
    def test_environment_variables_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._sort_is_windows_sort = True
        self.environment_variable_checks(runner, ['TMP'])
示例#39
0
 def test_interpreter(self):
     runner = MRJobRunner(conf_paths=[], interpreter=['ruby'])
     self.assertEqual(runner._bootstrap_mrjob(), False)
示例#40
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n', 'alligator\n', 'apple\n'])
示例#41
0
 def test_bootstrap_mrjob_overrides_interpreter(self):
     runner = MRJobRunner(
         conf_paths=[], interpreter=['ruby'], bootstrap_mrjob=True)
     self.assertEqual(runner._bootstrap_mrjob(), True)
示例#42
0
 def test_environment_variables_windows(self):
     runner = MRJobRunner(conf_path=False)
     runner._sort_is_windows_sort = True
     self.environment_variable_checks(runner, ['TMP'])
示例#43
0
 def test_empty(self):
     runner = MRJobRunner(conf_path=False)
     assert_equal(runner._hadoop_conf_args(0, 1), [])