def test_cat_output(self): a_dir_path = os.path.join(self.tmp_dir, 'a') b_dir_path = os.path.join(self.tmp_dir, 'b') l_dir_path = os.path.join(self.tmp_dir, '_logs') os.mkdir(a_dir_path) os.mkdir(b_dir_path) os.mkdir(l_dir_path) a_file_path = os.path.join(a_dir_path, 'part-00000') b_file_path = os.path.join(b_dir_path, 'part-00001') c_file_path = os.path.join(self.tmp_dir, 'part-00002') x_file_path = os.path.join(l_dir_path, 'log.xml') y_file_path = os.path.join(self.tmp_dir, '_SUCCESS') with open(a_file_path, 'w') as f: f.write('A') with open(b_file_path, 'w') as f: f.write('B') with open(c_file_path, 'w') as f: f.write('C') with open(x_file_path, 'w') as f: f.write('<XML XML XML/>') with open(y_file_path, 'w') as f: f.write('I win') runner = InlineMRJobRunner(conf_paths=[], output_dir=self.tmp_dir) self.assertEqual(sorted(to_lines(runner.cat_output())), [b'A', b'B', b'C'])
def test_cat_output(self): a_dir_path = os.path.join(self.tmp_dir, 'a') b_dir_path = os.path.join(self.tmp_dir, 'b') l_dir_path = os.path.join(self.tmp_dir, '_logs') os.mkdir(a_dir_path) os.mkdir(b_dir_path) os.mkdir(l_dir_path) a_file_path = os.path.join(a_dir_path, 'part-00000') b_file_path = os.path.join(b_dir_path, 'part-00001') c_file_path = os.path.join(self.tmp_dir, 'part-00002') x_file_path = os.path.join(l_dir_path, 'log.xml') y_file_path = os.path.join(self.tmp_dir, '_SUCCESS') with open(a_file_path, 'w') as f: f.write('A') with open(b_file_path, 'w') as f: f.write('B') with open(c_file_path, 'w') as f: f.write('C') with open(x_file_path, 'w') as f: f.write('<XML XML XML/>') with open(y_file_path, 'w') as f: f.write('I win') runner = InlineMRJobRunner(conf_paths=[], output_dir=self.tmp_dir) self.assertEqual(sorted(to_lines(runner.cat_output())), [b'A', b'B', b'C'])
def test_output_dir_not_considered_hidden(self): output_dir = os.path.join(self.tmp_dir, '_hidden', '_output_dir') self.makefile(os.path.join(output_dir, 'part-00000'), b'cats\n') runner = InlineMRJobRunner(conf_paths=[], output_dir=output_dir) self.assertEqual(sorted(to_lines(runner.cat_output())), [b'cats\n'])
class TestCatOutput(SandboxedTestCase): def setUp(self): super(TestCatOutput, self).setUp() self.output_dir = os.path.join(self.tmp_dir, 'job_output') os.mkdir(self.output_dir) self.runner = InlineMRJobRunner( conf_paths=[], output_dir=self.output_dir) def test_empty(self): self.assertEqual(list(self.runner.cat_output()), []) def test_typical_output(self): # actual output self.makefile(os.path.join(self.output_dir, 'part-00000'), b'line0\n') self.makefile(os.path.join(self.output_dir, 'part-00001'), b'line1\n') # hidden .crc file self.makefile(os.path.join(self.output_dir, '.crc.part-00000'), b'42\n') # hidden _SUCCESS file (ignore) self.makefile(os.path.join(self.output_dir, '_SUCCESS'), b'such a relief!\n') # hidden _logs dir self.makefile(os.path.join(self.output_dir, '_logs', 'log.xml'), b'pretty much the usual\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'line0\n', b'line1\n']) def test_output_in_subdirs(self): # test for output being placed in subdirs, for example with nicknack self.makefile(os.path.join(self.output_dir, 'a', 'part-00000'), b'line-a0\n') self.makefile(os.path.join(self.output_dir, 'a', 'part-00001'), b'line-a1\n') self.makefile(os.path.join(self.output_dir, 'b', 'part-00000'), b'line-b0\n') self.makefile(os.path.join(self.output_dir, 'b', '.crc.part-00000'), b'42\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'line-a0\n', b'line-a1\n', b'line-b0\n']) def test_read_all_non_hidden_files(self): self.makefile(os.path.join(self.output_dir, 'baz'), b'qux\n') self.makefile(os.path.join(self.output_dir, 'foo', 'bar'), b'baz\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'baz\n', b'qux\n']) def test_empty_string_between_files(self): self.makefile(os.path.join(self.output_dir, 'part-00000'), b'A') self.makefile(os.path.join(self.output_dir, 'part-00001'), b'\n') self.makefile(os.path.join(self.output_dir, 'part-00002'), b'C') # order isn't guaranteed, but there should be 3 chunks separated # by two empty strings chunks = list(self.runner.cat_output()) self.assertEqual(len(chunks), 5) self.assertEqual(chunks[1], b'') self.assertEqual(chunks[3], b'') def test_output_dir_not_considered_hidden(self): output_dir = os.path.join(self.tmp_dir, '_hidden', '_output_dir') self.makefile(os.path.join(output_dir, 'part-00000'), b'cats\n') runner = InlineMRJobRunner(conf_paths=[], output_dir=output_dir) self.assertEqual(sorted(to_lines(runner.cat_output())), [b'cats\n'])
class TestCatOutput(SandboxedTestCase): def setUp(self): super(TestCatOutput, self).setUp() self.output_dir = os.path.join(self.tmp_dir, 'job_output') os.mkdir(self.output_dir) self.runner = InlineMRJobRunner( conf_paths=[], output_dir=self.output_dir) def test_empty(self): self.assertEqual(list(self.runner.cat_output()), []) def test_typical_output(self): # actual output self.makefile(os.path.join(self.output_dir, 'part-00000'), b'line0\n') self.makefile(os.path.join(self.output_dir, 'part-00001'), b'line1\n') # hidden .crc file self.makefile(os.path.join(self.output_dir, '.crc.part-00000'), b'42\n') # hidden _SUCCESS file (ignore) self.makefile(os.path.join(self.output_dir, '_SUCCESS'), b'such a relief!\n') # hidden _logs dir self.makefile(os.path.join(self.output_dir, '_logs', 'log.xml'), b'pretty much the usual\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'line0\n', b'line1\n']) def test_output_in_subdirs(self): # test for output being placed in subdirs, for example with nicknack self.makefile(os.path.join(self.output_dir, 'a', 'part-00000'), b'line-a0\n') self.makefile(os.path.join(self.output_dir, 'a', 'part-00001'), b'line-a1\n') self.makefile(os.path.join(self.output_dir, 'b', 'part-00000'), b'line-b0\n') self.makefile(os.path.join(self.output_dir, 'b', '.crc.part-00000'), b'42\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'line-a0\n', b'line-a1\n', b'line-b0\n']) def test_read_all_non_hidden_files(self): self.makefile(os.path.join(self.output_dir, 'baz'), b'qux\n') self.makefile(os.path.join(self.output_dir, 'foo', 'bar'), b'baz\n') self.assertEqual(sorted(to_lines(self.runner.cat_output())), [b'baz\n', b'qux\n']) def test_empty_string_between_files(self): self.makefile(os.path.join(self.output_dir, 'part-00000'), b'A') self.makefile(os.path.join(self.output_dir, 'part-00001'), b'\n') self.makefile(os.path.join(self.output_dir, 'part-00002'), b'C') # order isn't guaranteed, but there should be 3 chunks separated # by two empty strings chunks = list(self.runner.cat_output()) self.assertEqual(len(chunks), 5) self.assertEqual(chunks[1], b'') self.assertEqual(chunks[3], b'') def test_output_dir_not_considered_hidden(self): output_dir = os.path.join(self.tmp_dir, '_hidden', '_output_dir') self.makefile(os.path.join(output_dir, 'part-00000'), b'cats\n') runner = InlineMRJobRunner(conf_paths=[], output_dir=output_dir) self.assertEqual(sorted(to_lines(runner.stream_output())), [b'cats\n']) def test_deprecated_stream_output(self): self.makefile(os.path.join(self.output_dir, 'part-00000'), b'1\n2') self.makefile(os.path.join(self.output_dir, 'part-00001'), b'3\n4\n') log = self.start(patch('mrjob.runner.log')) # should group output into lines, but not join across files self.assertEqual(sorted(self.runner.stream_output()), [b'1\n', b'2', b'3\n', b'4\n']) # should issue deprecation warning self.assertEqual(log.warning.call_count, 1)