def test_long_lines(self): super_long_line = 'a' * 10000 + '\n' + 'b' * 1000 + '\nlast\n' self.assertEqual( list(buffer_iterator_to_line_iterator( chunk for chunk in (super_long_line[0+i:1024+i] for i in range(0, len(super_long_line), 1024)))), ['a' * 10000 + '\n', 'b' * 1000 + '\n', 'last\n'])
def test_add_trailing_newline(self): self.assertEqual( list( buffer_iterator_to_line_iterator( chunk for chunk in [b'Alouette,\ngentille', b' Alouette.'])), [b'Alouette,\n', b'gentille Alouette.\n'])
def test_buffered_lines(self): self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in ['The quick\nbrown fox\nju', 'mped over\nthe lazy\ndog', 's.\n'])), ['The quick\n', 'brown fox\n', 'jumped over\n', 'the lazy\n', 'dogs.\n'])
def test_buffered_lines(self): self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in ['The quick\nbrown fox\nju', 'mped over\nthe lazy\ndog', 's.\n'])), ['The quick\n', 'brown fox\n', 'jumped over\n', 'the lazy\n', 'dogs.\n'])
def test_buffered_lines(self): self.assertEqual( list( buffer_iterator_to_line_iterator( chunk for chunk in [b"The quick\nbrown fox\nju", b"mped over\nthe lazy\ndog", b"s.\n"] ) ), [b"The quick\n", b"brown fox\n", b"jumped over\n", b"the lazy\n", b"dogs.\n"], )
def test_long_lines(self): super_long_line = b"a" * 10000 + b"\n" + b"b" * 1000 + b"\nlast\n" self.assertEqual( list( buffer_iterator_to_line_iterator( chunk for chunk in (super_long_line[0 + i : 1024 + i] for i in range(0, len(super_long_line), 1024)) ) ), [b"a" * 10000 + b"\n", b"b" * 1000 + b"\n", b"last\n"], )
def _cat_file(self, filename): # stream lines from the s3 key s3_key = self.get_s3_key(filename) # stream the key to a fileobj stream = StringIO() s3_key.get_file(stream) stream.seek(0) buffer_iterator = read_file(s3_key_to_uri(s3_key), fileobj=stream) return buffer_iterator_to_line_iterator(buffer_iterator)
def test_deprecated_alias(self): with no_handlers_for_logger('mrjob.util'): stderr = StringIO() log_to_stream('mrjob.util', stderr) self.assertEqual( list(buffer_iterator_to_line_iterator( chunk for chunk in [b'The quick\nbrown fox\njumped over\nthe lazy\ndogs.\n']) ), [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n', b'dogs.\n']) self.assertIn('has been renamed', stderr.getvalue())
def test_deprecated_alias(self): with no_handlers_for_logger('mrjob.util'): stderr = StringIO() log_to_stream('mrjob.util', stderr) self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in [b'The quick\nbrown fox\nju', b'mped over\nthe lazy\ndog', b's.\n'])), [b'The quick\n', b'brown fox\n', b'jumped over\n', b'the lazy\n', b'dogs.\n']) self.assertIn('has been renamed', stderr.getvalue())
def _cat_file(self, filename): # stream lines from the s3 key s3_key = self.get_s3_key(filename) buffer_iterator = read_file(s3_key_to_uri(s3_key), fileobj=s3_key) return buffer_iterator_to_line_iterator(buffer_iterator)
def _cat_file(self, filename): # stream lines from the s3 key s3_key = self.get_s3_key(filename) buffer_iterator = read_file(s3_key_to_uri(s3_key), fileobj=s3_key) return buffer_iterator_to_line_iterator(buffer_iterator)
def test_empty(self): self.assertEqual(list(buffer_iterator_to_line_iterator(_ for _ in ())), [])
def test_add_trailing_newline(self): self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in ['Alouette,\ngentille', ' Alouette.'])), ['Alouette,\n', 'gentille Alouette.\n'])
def test_empty(self): self.assertEqual( list(buffer_iterator_to_line_iterator(_ for _ in ())), [])
def test_add_trailing_newline(self): self.assertEqual( list(buffer_iterator_to_line_iterator(chunk for chunk in [b"Alouette,\ngentille", b" Alouette."])), [b"Alouette,\n", b"gentille Alouette.\n"], )