Exemplo n.º 1
0
    def test_two_files(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a, self.b], self.out)

        self.assertEqual(
            list(open(self.out)),
            ['A\n', 'B\n', 'alligator\n', 'apple\n', 'ball\n', 'banana\n'])
Exemplo n.º 2
0
 def test_jobconf(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, jobconf=jobconf)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-jobconf', 'BAX=Arnold',
                   '-jobconf', 'BAZ=qux',
                   '-jobconf', 'FOO=bar',])
Exemplo n.º 3
0
 def test_cmdenv(self):
     cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = MRJobRunner(conf_path=False, cmdenv=cmdenv)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-cmdenv', 'BAX=Arnold',
                   '-cmdenv', 'BAZ=qux',
                   '-cmdenv', 'FOO=bar',])
Exemplo n.º 4
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_paths=[])
        with no_handlers_for_logger():
            self.assertRaises(CalledProcessError, runner._invoke_sort,
                              [self.a, self.b], self.out)
Exemplo n.º 5
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        runner._invoke_sort([self.a], self.out)

        self.assertEqual(list(open(self.out)),
                         ['A\n',
                          'alligator\n',
                          'apple\n'])
Exemplo n.º 6
0
    def test_one_file(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._invoke_sort([self.a], self.out)

        with open(self.out) as out_f:
            self.assertEqual(list(out_f), ['A\n', 'alligator\n', 'apple\n'])
Exemplo n.º 7
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = MRJobRunner(conf_path=False, hadoop_output_format=format)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-outputformat', format])
     # test multi-step job
     assert_equal(runner._hadoop_conf_args(0, 2), [])
     assert_equal(runner._hadoop_conf_args(1, 2),
                  ['-outputformat', format])
Exemplo n.º 8
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        with no_handlers_for_logger():
            # sometimes we get a broken pipe error (IOError) on PyPy
            self.assertRaises((CalledProcessError, IOError),
                              runner._invoke_sort, [self.a, self.b], self.out)
Exemplo n.º 9
0
 def test_hadoop_extra_args_comes_first(self):
     runner = MRJobRunner(
         conf_path=False,
         cmdenv={'FOO': 'bar'},
         hadoop_input_format='FooInputFormat',
         hadoop_output_format='BarOutputFormat',
         jobconf={'baz': 'quz'},
         hadoop_extra_args=['-libjar', 'qux.jar'])
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args(0, 1)
     assert_equal(conf_args[:2], ['-libjar', 'qux.jar'])
     assert_equal(len(conf_args), 10)
Exemplo n.º 10
0
 def test_python_bin(self):
     runner = MRJobRunner(python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['python', '-v'])
     self.assertEqual(runner._interpreter(steps=True), [sys.executable])
Exemplo n.º 11
0
 def test_bootstrap_mrjob_overrides_interpreter(self):
     runner = MRJobRunner(
         conf_paths=[], interpreter=['ruby'], bootstrap_mrjob=True)
     self.assertEqual(runner._bootstrap_mrjob(), True)
Exemplo n.º 12
0
 def test_interpreter(self):
     runner = MRJobRunner(conf_paths=[], interpreter=['ruby'])
     self.assertEqual(runner._bootstrap_mrjob(), False)
Exemplo n.º 13
0
 def test_no_bootstrap_mrjob(self):
     runner = MRJobRunner(conf_paths=[], bootstrap_mrjob=False)
     self.assertEqual(runner._bootstrap_mrjob(), False)
Exemplo n.º 14
0
 def test_default(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertEqual(runner._bootstrap_mrjob(), True)
Exemplo n.º 15
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = MRJobRunner(conf_path=False, hadoop_extra_args=extra_args)
     assert_equal(runner._hadoop_conf_args(0, 1), extra_args)
Exemplo n.º 16
0
 def test_environment_variables_non_windows(self):
     runner = MRJobRunner(conf_path=False)
     self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
Exemplo n.º 17
0
    def test_environment_variables_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        runner._sort_is_windows_sort = True
        self.environment_variable_checks(runner, ['TMP'])
Exemplo n.º 18
0
 def test_default(self):
     runner = MRJobRunner()
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True),
                      [sys.executable])
Exemplo n.º 19
0
    def test_bad_sort(self):
        self.use_bad_sort()

        runner = MRJobRunner(conf_path=False)
        assert_raises(CalledProcessError,
                      runner._invoke_sort, [self.a, self.b], self.out)
Exemplo n.º 20
0
 def test_no_files(self):
     runner = MRJobRunner(conf_path=False)
     assert_raises(ValueError,
                   runner._invoke_sort, [], self.out)
Exemplo n.º 21
0
    def test_environment_variables_non_windows(self):
        runner = MRJobRunner(conf_paths=[])
        self.addCleanup(runner.cleanup)

        self.environment_variable_checks(runner, ['TEMP', 'TMPDIR'])
Exemplo n.º 22
0
 def test_steps_python_bin(self):
     runner = MRJobRunner(steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(),
                      self.default_python_bin())
     self.assertEqual(runner._interpreter(steps=True), ['python', '-v'])
Exemplo n.º 23
0
 def test_no_files(self):
     runner = MRJobRunner(conf_paths=[])
     self.assertRaises(ValueError, runner._invoke_sort, [], self.out)
Exemplo n.º 24
0
 def test_interpreter(self):
     runner = MRJobRunner(interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Exemplo n.º 25
0
 def test_steps_interpreter(self):
     # including whether steps_interpreter overrides interpreter
     runner = MRJobRunner(interpreter=['ruby', '-v'],
                          steps_interpreter=['ruby'])
     self.assertEqual(runner._interpreter(), ['ruby', '-v'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Exemplo n.º 26
0
 def test_interpreter_overrides_steps_python_bin(self):
     runner = MRJobRunner(interpreter=['ruby'],
                          steps_python_bin=['python', '-v'])
     self.assertEqual(runner._interpreter(), ['ruby'])
     self.assertEqual(runner._interpreter(steps=True), ['ruby'])
Exemplo n.º 27
0
 def test_environment_variables_windows(self):
     runner = MRJobRunner(conf_path=False)
     runner._sort_is_windows_sort = True
     self.environment_variable_checks(runner, ['TMP'])
Exemplo n.º 28
0
 def test_empty(self):
     runner = MRJobRunner(conf_path=False)
     assert_equal(runner._hadoop_conf_args(0, 1), [])