Пример #1
0
    def test_per_step_jobconf_can_set_number_of_tasks(self):
        mr_job = MRTestPerStepJobConf(["-r", self.RUNNER, "--jobconf", "mapred.map.tasks=2"])
        # need at least two items of input to get two map tasks
        mr_job.sandbox(StringIO("foo\nbar\n"))

        with mr_job.make_runner() as runner:
            runner.run()

            # sanity test: --jobconf should definitely work
            self.assertEqual(runner.counters()[0]["count"]["mapper_init"], 2)
            # the job sets its own mapred.map.tasks to 4 for the 2nd step
            self.assertEqual(runner.counters()[1]["count"]["mapper_init"], 4)
Пример #2
0
    def test_per_step_jobconf_can_set_number_of_tasks(self):
        mr_job = MRTestPerStepJobConf([
            '-r', self.RUNNER, '--jobconf', 'mapred.map.tasks=2',
        ])
        # need at least two items of input to get two map tasks
        mr_job.sandbox(BytesIO(b'foo\nbar\n'))

        with mr_job.make_runner() as runner:
            runner.run()

            # sanity test: --jobconf should definitely work
            self.assertEqual(runner.counters()[0]['count']['mapper_init'], 2)
            # the job sets its own mapred.map.tasks to 4 for the 2nd step
            self.assertEqual(runner.counters()[1]['count']['mapper_init'], 4)
Пример #3
0
    def test_per_step_jobconf(self):
        mr_job = MRTestPerStepJobConf([
            '-r', self.RUNNER, '--jobconf', 'user.defined=something'])
        mr_job.sandbox()

        results = {}

        with mr_job.make_runner() as runner:
            runner.run()

            for key, value in mr_job.parse_output(runner.cat_output()):
                results[tuple(key)] = value

        # user.defined gets re-defined in the second step
        self.assertEqual(results[(0, 'user.defined')], 'something')
        self.assertEqual(results[(1, 'user.defined')], 'nothing')
Пример #4
0
    def test_per_step_jobconf(self):
        mr_job = MRTestPerStepJobConf(["-r", self.RUNNER, "--jobconf", "user.defined=something"])
        mr_job.sandbox()

        results = {}

        with mr_job.make_runner() as runner:
            runner.run()

            for line in runner.stream_output():
                key, value = mr_job.parse_output_line(line)
                results[tuple(key)] = value

        # user.defined gets re-defined in the second step
        self.assertEqual(results[(0, "user.defined")], "something")
        self.assertEqual(results[(1, "user.defined")], "nothing")
Пример #5
0
    def test_per_step_jobconf(self):
        mr_job = MRTestPerStepJobConf(
            ['-r', self.RUNNER, '--jobconf', 'user.defined=something'])
        mr_job.sandbox()

        results = {}

        with mr_job.make_runner() as runner:
            runner.run()

            for key, value in mr_job.parse_output(runner.cat_output()):
                results[tuple(key)] = value

        # user.defined gets re-defined in the second step
        self.assertEqual(results[(0, 'user.defined')], 'something')
        self.assertEqual(results[(1, 'user.defined')], 'nothing')
Пример #6
0
    def test_per_step_jobconf_can_set_number_of_tasks(self):
        mr_job = MRTestPerStepJobConf([
            '-r',
            self.RUNNER,
            '--jobconf',
            'mapred.map.tasks=2',
        ])
        # need at least two items of input to get two map tasks
        mr_job.sandbox(BytesIO(b'foo\nbar\n'))

        with mr_job.make_runner() as runner:
            runner.run()

            # sanity test: --jobconf should definitely work
            self.assertEqual(runner.counters()[0]['count']['mapper_init'], 2)
            # the job sets its own mapred.map.tasks to 4 for the 2nd step
            self.assertEqual(runner.counters()[1]['count']['mapper_init'], 4)