def test_per_step_jobconf_can_set_number_of_tasks(self): mr_job = MRTestPerStepJobConf(["-r", self.RUNNER, "--jobconf", "mapred.map.tasks=2"]) # need at least two items of input to get two map tasks mr_job.sandbox(StringIO("foo\nbar\n")) with mr_job.make_runner() as runner: runner.run() # sanity test: --jobconf should definitely work self.assertEqual(runner.counters()[0]["count"]["mapper_init"], 2) # the job sets its own mapred.map.tasks to 4 for the 2nd step self.assertEqual(runner.counters()[1]["count"]["mapper_init"], 4)
def test_per_step_jobconf_can_set_number_of_tasks(self): mr_job = MRTestPerStepJobConf([ '-r', self.RUNNER, '--jobconf', 'mapred.map.tasks=2', ]) # need at least two items of input to get two map tasks mr_job.sandbox(BytesIO(b'foo\nbar\n')) with mr_job.make_runner() as runner: runner.run() # sanity test: --jobconf should definitely work self.assertEqual(runner.counters()[0]['count']['mapper_init'], 2) # the job sets its own mapred.map.tasks to 4 for the 2nd step self.assertEqual(runner.counters()[1]['count']['mapper_init'], 4)
def test_per_step_jobconf(self): mr_job = MRTestPerStepJobConf([ '-r', self.RUNNER, '--jobconf', 'user.defined=something']) mr_job.sandbox() results = {} with mr_job.make_runner() as runner: runner.run() for key, value in mr_job.parse_output(runner.cat_output()): results[tuple(key)] = value # user.defined gets re-defined in the second step self.assertEqual(results[(0, 'user.defined')], 'something') self.assertEqual(results[(1, 'user.defined')], 'nothing')
def test_per_step_jobconf(self): mr_job = MRTestPerStepJobConf(["-r", self.RUNNER, "--jobconf", "user.defined=something"]) mr_job.sandbox() results = {} with mr_job.make_runner() as runner: runner.run() for line in runner.stream_output(): key, value = mr_job.parse_output_line(line) results[tuple(key)] = value # user.defined gets re-defined in the second step self.assertEqual(results[(0, "user.defined")], "something") self.assertEqual(results[(1, "user.defined")], "nothing")
def test_per_step_jobconf(self): mr_job = MRTestPerStepJobConf([ '-r', self.RUNNER, '-D', 'user.defined=something']) mr_job.sandbox() results = {} with mr_job.make_runner() as runner: runner.run() for key, value in mr_job.parse_output(runner.cat_output()): results[tuple(key)] = value # user.defined gets re-defined in the second step self.assertEqual(results[(0, 'user.defined')], 'something') self.assertEqual(results[(1, 'user.defined')], 'nothing')