def test_one_file(self): two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile( ['-r', 'spark', '--emulate-map-input-file', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, {'file://' + two_lines_path: 2})
def test_emulate_map_input_file_in_conf(self): self.start( mrjob_conf_patcher( dict(runners=dict(spark=dict(emulate_map_input_file=True))))) two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile(['-r', 'spark', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, {'file://' + two_lines_path: 2})
def test_override_emulate_map_input_file_in_conf(self): self.start( mrjob_conf_patcher( dict(runners=dict(spark=dict(emulate_map_input_file=True))))) two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile( ['-r', 'spark', '--no-emulate-map-input-file', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) # without emulate_map_input_file, there is no input file path self.assertEqual(output, {None: 2})
def test_input_dir(self): input_dir = self.makedirs('input') two_lines_path = self.makefile('input/two_lines', b'line 1\nline 2\n') three_lines_path = self.makefile('input/three_lines', b'A\nBB\nCCC\n') job = MRCountLinesByFile( ['-r', 'spark', '--emulate-map-input-file', input_dir]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, { 'file://' + two_lines_path: 2, 'file://' + three_lines_path: 3 })