def test_bad_sort_bin(self): # patching check_call to raise an exception causes pickling issues in # multiprocessing, so just use the false command job = MRGroup(['-r', 'local', '--sort-bin', 'false']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual(sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertTrue(self._sort_lines_in_memory.called)
def _test_environment_variables(self, *args): job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() # don't bother with output; already tested this above self.assertTrue(self.check_call.called) env = self.check_call.call_args[1]['env'] self.assertEqual(env['LC_ALL'], 'C') self.assertEqual(env['TMP'], runner._get_local_tmp_dir()) self.assertEqual(env['TMPDIR'], runner._get_local_tmp_dir()) self.assertNotIn('TEMP', env) # this was for Windows sort
def test_custom_sort_bin(self): job = MRGroup(['-r', 'local', '--sort-bin', 'sort -r']) job.sandbox( stdin=BytesIO(b'apples\nbabies\nbuffaloes\nbears\nbicycles')) with job.make_runner() as runner: runner.run() self.assertEqual( sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bicycles', 'bears', 'babies'])]) self.assertTrue(self.check_call.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:2], ['sort', '-r'])
def test_default_sort_bin(self): job = MRGroup(['-r', 'local']) job.sandbox(stdin=BytesIO(b'apples\nbuffaloes\nbears')) with job.make_runner() as runner: runner.run() self.assertEqual(sorted(job.parse_output(runner.cat_output())), [('a', ['apples']), ('b', ['buffaloes', 'bears'])]) self.assertTrue(self.check_call.called) self.assertFalse(self._sort_lines_in_memory.called) sort_args = self.check_call.call_args[0][0] self.assertEqual(sort_args[:6], ['sort', '-t', '\t', '-k', '1,1', '-s'])