Пример #1
0
 def test_hadoop_output_format(self):
     format = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
     runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-outputformat", format])
     # test multi-step job
     self.assertEqual(runner._hadoop_conf_args({}, 0, 2), [])
     self.assertEqual(runner._hadoop_conf_args({}, 1, 2), ["-outputformat", format])
Пример #2
0
 def test_jobconf(self):
     jobconf = {"FOO": "bar", "BAZ": "qux", "BAX": "Arnold"}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-D", "BAX=Arnold", "-D", "BAZ=qux", "-D", "FOO=bar"])
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version="0.18")
     self.assertEqual(
         runner._hadoop_conf_args({}, 0, 1), ["-jobconf", "BAX=Arnold", "-jobconf", "BAZ=qux", "-jobconf", "FOO=bar"]
     )
Пример #3
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = LocalMRJobRunner(conf_path=False, hadoop_output_format=format)
     assert_equal(runner._hadoop_conf_args(0, 1),
                  ['-outputformat', format])
     # test multi-step job
     assert_equal(runner._hadoop_conf_args(0, 2), [])
     assert_equal(runner._hadoop_conf_args(1, 2),
                  ['-outputformat', format])
Пример #4
0
 def test_hadoop_input_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileInputFormat'
     runner = LocalMRJobRunner(conf_paths=[], hadoop_input_format=format)
     self.assertEqual(runner._hadoop_conf_args(0, 1),
                      ['-inputformat', format])
     # test multi-step job
     self.assertEqual(runner._hadoop_conf_args(0, 2),
                      ['-inputformat', format])
     self.assertEqual(runner._hadoop_conf_args(1, 2), [])
Пример #5
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-outputformat', format])
     # test multi-step job
     self.assertEqual(runner._hadoop_conf_args({}, 0, 2), [])
     self.assertEqual(runner._hadoop_conf_args({}, 1, 2),
                  ['-outputformat', format])
Пример #6
0
 def test_hadoop_output_format(self):
     format = 'org.apache.hadoop.mapred.SequenceFileOutputFormat'
     runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-D', 'mapred.job.name=None > None',
                       '-outputformat', format])
     # test multi-step job
     self.assertEqual(runner._hadoop_conf_args({}, 0, 2),
                      ['-D', 'mapred.job.name=None > None (step 1 of 2)'])
     self.assertEqual(runner._hadoop_conf_args({}, 1, 2),
                      ['-D', 'mapred.job.name=None > None (step 2 of 2)',
                       '-outputformat', format
                       ])
Пример #7
0
 def test_jobconf(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     self.assertEqual(runner._hadoop_conf_args(0, 1),
                      ['-D', 'BAX=Arnold',
                       '-D', 'BAZ=qux',
                       '-D', 'FOO=bar',
                       ])
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
                               hadoop_version='0.18')
     self.assertEqual(runner._hadoop_conf_args(0, 1),
                      ['-jobconf', 'BAX=Arnold',
                       '-jobconf', 'BAZ=qux',
                       '-jobconf', 'FOO=bar',
                       ])
Пример #8
0
 def test_jobconf(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-D', 'BAX=Arnold',
                       '-D', 'BAZ=qux',
                       '-D', 'FOO=bar',
                       ])
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
                               hadoop_version='0.18')
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-jobconf', 'BAX=Arnold',
                       '-jobconf', 'BAZ=qux',
                       '-jobconf', 'FOO=bar',
                       ])
Пример #9
0
    def test_empty_jobconf_values(self):
        # value of None means to omit that jobconf
        jobconf = {'foo': '', 'bar': None}
        runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)

        self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                         ['-D', 'foo='])
Пример #10
0
 def test_cmdenv(self):
     cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-cmdenv', 'BAX=Arnold',
                       '-cmdenv', 'BAZ=qux',
                       '-cmdenv', 'FOO=bar',
                       ])
Пример #11
0
 def test_cmdenv(self):
     cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
     runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv)
     self.assertEqual(runner._hadoop_conf_args(0, 1),
                      ['-cmdenv', 'BAX=Arnold',
                       '-cmdenv', 'BAZ=qux',
                       '-cmdenv', 'FOO=bar',
                       ])
Пример #12
0
    def test_partitioner(self):
        partitioner = 'org.apache.hadoop.mapreduce.Partitioner'

        runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner)
        self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                         ['-D', 'mapred.job.name=None > None',
                          '-partitioner', partitioner,
                          ])
Пример #13
0
 def test_jobconf_job_name_custom(self):
     jobconf = {'BAX': 'Arnold', 'mapred.job.name': 'Foo'}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
                               hadoop_version='0.18')
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-jobconf', 'BAX=Arnold',
                       '-jobconf', 'mapred.job.name=Foo'
                       ])
Пример #14
0
 def test_jobconf_from_step(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux'}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     step = {'jobconf': {'BAZ': 'quux', 'BAX': 'Arnold'}}
     self.assertEqual(runner._hadoop_conf_args(step, 0, 1),
                      ['-D', 'BAX=Arnold',
                       '-D', 'BAZ=quux',
                       '-D', 'FOO=bar',
                       ])
Пример #15
0
 def test_configuration_translation(self):
     jobconf = {'mapred.jobtracker.maxtasks.per.job': 1}
     with no_handlers_for_logger('mrjob.compat'):
         runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
                               hadoop_version='0.21')
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-D', 'mapred.jobtracker.maxtasks.per.job=1',
                       '-D', 'mapreduce.jobtracker.maxtasks.perjob=1'
                       ])
Пример #16
0
 def test_jobconf_from_step(self):
     jobconf = {'FOO': 'bar', 'BAZ': 'qux'}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     step = {'jobconf': {'BAZ': 'quux', 'BAX': 'Arnold'}}
     self.assertEqual(runner._hadoop_conf_args(step, 0, 1),
                      ['-D', 'BAX=Arnold',
                       '-D', 'BAZ=quux',
                       '-D', 'FOO=bar',
                       ])
Пример #17
0
 def test_hadoop_extra_args_comes_first(self):
     runner = LocalMRJobRunner(
         cmdenv={"FOO": "bar"},
         conf_paths=[],
         hadoop_extra_args=["-libjar", "qux.jar"],
         hadoop_input_format="FooInputFormat",
         hadoop_output_format="BarOutputFormat",
         jobconf={"baz": "quz"},
         partitioner="java.lang.Object",
     )
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args({}, 0, 1)
     self.assertEqual(conf_args[:2], ["-libjar", "qux.jar"])
     self.assertEqual(len(conf_args), 12)
Пример #18
0
 def test_hadoop_extra_args_comes_first(self):
     runner = LocalMRJobRunner(
         cmdenv={'FOO': 'bar'},
         conf_paths=[],
         hadoop_extra_args=['-libjar', 'qux.jar'],
         hadoop_input_format='FooInputFormat',
         hadoop_output_format='BarOutputFormat',
         jobconf={'baz': 'quz'},
         partitioner='java.lang.Object',
     )
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args({}, 0, 1)
     self.assertEqual(conf_args[:2], ['-libjar', 'qux.jar'])
     self.assertEqual(len(conf_args), 14)
Пример #19
0
 def test_hadoop_extra_args_comes_first(self):
     runner = LocalMRJobRunner(
         cmdenv={'FOO': 'bar'},
         conf_paths=[],
         hadoop_extra_args=['-libjar', 'qux.jar'],
         hadoop_input_format='FooInputFormat',
         hadoop_output_format='BarOutputFormat',
         jobconf={'baz': 'quz'},
         partitioner='java.lang.Object',
     )
     # hadoop_extra_args should come first
     conf_args = runner._hadoop_conf_args({}, 0, 1)
     self.assertEqual(conf_args[:2], ['-libjar', 'qux.jar'])
     self.assertEqual(len(conf_args), 12)
Пример #20
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = LocalMRJobRunner(conf_path=False,
                               hadoop_extra_args=extra_args)
     assert_equal(runner._hadoop_conf_args(0, 1), extra_args)
Пример #21
0
 def test_jobconf_job_name_default(self):
     runner = LocalMRJobRunner(conf_paths=[], hadoop_version='0.18')
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      ['-jobconf', 'mapred.job.name=None > None'
                       ])
Пример #22
0
    def test_partitioner(self):
        partitioner = 'org.apache.hadoop.mapreduce.Partitioner'

        runner = LocalMRJobRunner(conf_path=False, partitioner=partitioner)
        assert_equal(runner._hadoop_conf_args(0, 1),
                     ['-partitioner', partitioner])
Пример #23
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = LocalMRJobRunner(conf_paths=[],
                               hadoop_extra_args=extra_args)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                      extra_args + ['-D', 'mapred.job.name=None > None'])
Пример #24
0
 def test_empty(self):
     runner = LocalMRJobRunner(conf_paths=[])
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
         ['-D', 'mapred.job.name=None > None'])
Пример #25
0
    def test_partitioner(self):
        partitioner = 'org.apache.hadoop.mapreduce.Partitioner'

        runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner)
        self.assertEqual(runner._hadoop_conf_args(0, 1),
                         ['-partitioner', partitioner])
Пример #26
0
 def test_empty(self):
     runner = LocalMRJobRunner(conf_paths=[])
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1), [])
Пример #27
0
 def test_cmdenv(self):
     cmdenv = {"FOO": "bar", "BAZ": "qux", "BAX": "Arnold"}
     runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv)
     self.assertEqual(
         runner._hadoop_conf_args({}, 0, 1), ["-cmdenv", "BAX=Arnold", "-cmdenv", "BAZ=qux", "-cmdenv", "FOO=bar"]
     )
Пример #28
0
 def test_jobconf_from_step(self):
     jobconf = {"FOO": "bar", "BAZ": "qux"}
     runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
     step = {"jobconf": {"BAZ": "quux", "BAX": "Arnold"}}
     self.assertEqual(runner._hadoop_conf_args(step, 0, 1), ["-D", "BAX=Arnold", "-D", "BAZ=quux", "-D", "FOO=bar"])
Пример #29
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = LocalMRJobRunner(conf_paths=[],
                               hadoop_extra_args=extra_args)
     self.assertEqual(runner._hadoop_conf_args(0, 1), extra_args)
Пример #30
0
 def test_empty(self):
     runner = LocalMRJobRunner(conf_paths=[])
     self.assertEqual(runner._hadoop_conf_args(0, 1), [])
Пример #31
0
    def test_partitioner(self):
        partitioner = 'org.apache.hadoop.mapreduce.Partitioner'

        runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner)
        self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
                         ['-partitioner', partitioner])
Пример #32
0
 def test_empty(self):
     runner = LocalMRJobRunner(conf_path=False)
     assert_equal(runner._hadoop_conf_args(0, 1), [])
Пример #33
0
 def test_hadoop_extra_args(self):
     extra_args = ["-foo", "bar"]
     runner = LocalMRJobRunner(conf_paths=[], hadoop_extra_args=extra_args)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1), extra_args)
Пример #34
0
    def test_empty_jobconf_values(self):
        # value of None means to omit that jobconf
        jobconf = {'foo': '', 'bar': None}
        runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)

        self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ['-D', 'foo='])
Пример #35
0
 def test_hadoop_extra_args(self):
     extra_args = ['-foo', 'bar']
     runner = LocalMRJobRunner(conf_paths=[],
                               hadoop_extra_args=extra_args)
     self.assertEqual(runner._hadoop_conf_args({}, 0, 1), extra_args)