def test_one_file(self): two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile( ['-r', 'spark', '--emulate-map-input-file', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, {'file://' + two_lines_path: 2})
def test_emulate_map_input_file_in_conf(self): self.start( mrjob_conf_patcher( dict(runners=dict(spark=dict(emulate_map_input_file=True))))) two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile(['-r', 'spark', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, {'file://' + two_lines_path: 2})
def test_override_emulate_map_input_file_in_conf(self): self.start( mrjob_conf_patcher( dict(runners=dict(spark=dict(emulate_map_input_file=True))))) two_lines_path = self.makefile('two_lines', b'line\nother line\n') job = MRCountLinesByFile( ['-r', 'spark', '--no-emulate-map-input-file', two_lines_path]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) # without emulate_map_input_file, there is no input file path self.assertEqual(output, {None: 2})
def test_input_dir(self): input_dir = self.makedirs('input') two_lines_path = self.makefile('input/two_lines', b'line 1\nline 2\n') three_lines_path = self.makefile('input/three_lines', b'A\nBB\nCCC\n') job = MRCountLinesByFile( ['-r', 'spark', '--emulate-map-input-file', input_dir]) with job.make_runner() as runner: runner.run() output = dict(job.parse_output(runner.cat_output())) self.assertEqual(output, { 'file://' + two_lines_path: 2, 'file://' + three_lines_path: 3 })
def test_files(self): cat_file = self.makefile('cats.txt', b'cats are the best') dog_file = self.makefile('dogs.txt', b'woof woof woof\nwoof woof') empty_file = self.makefile('empty.txt') self.assertEqual( run_job(MRCountLinesByFile([cat_file, dog_file, empty_file])), { 'file://' + cat_file: 1, 'file://' + dog_file: 2, })
def test_empty(self): self.assertEqual(run_job(MRCountLinesByFile([])), {})