def test_multiple_output_pardo(self): temp_path = self.create_temp_file(self.SAMPLE_TEXT) result_prefix = temp_path + '.result' multiple_output_pardo.run([ '--input=%s*' % temp_path, '--output=%s' % result_prefix]) expected_char_count = len(''.join(self.SAMPLE_TEXT.split('\n'))) with open_shards(result_prefix + '-chars-*-of-*') as f: contents = f.read() self.assertEqual(expected_char_count, int(contents)) short_words = self.get_wordcount_results( result_prefix + '-short-words-*-of-*') self.assertEqual(sorted(short_words), sorted(self.EXPECTED_SHORT_WORDS)) words = self.get_wordcount_results(result_prefix + '-words-*-of-*') self.assertEqual(sorted(words), sorted(self.EXPECTED_WORDS))
def test_multiple_output_pardo(self): temp_path = self.create_temp_file(self.SAMPLE_TEXT) result_prefix = temp_path + '.result' multiple_output_pardo.run([ '--input=%s*' % temp_path, '--output=%s' % result_prefix]).wait_until_finish() expected_char_count = len(''.join(self.SAMPLE_TEXT.split('\n'))) with open(result_prefix + '-chars-00000-of-00001') as f: contents = f.read() self.assertEqual(expected_char_count, int(contents)) short_words = self.get_wordcount_results( result_prefix + '-short-words-00000-of-00001') self.assertEqual(sorted(short_words), sorted(self.EXPECTED_SHORT_WORDS)) words = self.get_wordcount_results(result_prefix + '-words-00000-of-00001') self.assertEqual(sorted(words), sorted(self.EXPECTED_WORDS))