def test_gh606(): encoding = 'utf-16-le' euro = u'\u20ac' yen = u'\u00a5' linesep = os.linesep bin_euro = u'\u20ac'.encode(encoding) bin_yen = u'\u00a5'.encode(encoding) bin_linesep = linesep.encode(encoding) data = (euro * 10) + linesep + (yen * 10) + linesep + (euro * 10) bin_data = data.encode(encoding) with tmpfile() as fn: with open(fn, 'w+b') as f: f.write(bin_data) f.seek(0) stop = len(bin_euro) * 10 + len(bin_linesep) res = textblock(f, 1, stop, encoding=encoding) assert res == ((yen * 10) + linesep).encode(encoding) stop = len(bin_euro) * 10 + len(bin_linesep) res = textblock(f, 0, stop, encoding=encoding) assert res == ((euro * 10) + linesep + (yen * 10) + linesep).encode(encoding)
def test_gh606(): encoding = 'utf-16-le' euro = u'\u20ac' yen = u'\u00a5' linesep = os.linesep bin_euro = u'\u20ac'.encode(encoding) bin_yen = u'\u00a5'.encode(encoding) bin_linesep = linesep.encode(encoding) data = (euro * 10) + linesep + (yen * 10) + linesep + (euro * 10) bin_data = data.encode(encoding) with tmpfile() as fn: with open(fn, 'wb') as f: f.write(bin_data) stop = len(bin_euro) * 10 + len(bin_linesep) + 1 res = ''.join(textblock(fn, 1, stop, encoding=encoding)).encode(encoding) assert res == ((yen * 10) + linesep).encode(encoding) stop = len(bin_euro) * 10 + len(bin_linesep) + 1 res = ''.join(textblock(fn, 0, stop, encoding=encoding)).encode(encoding) assert res == ((euro * 10) + linesep + (yen * 10) + linesep).encode(encoding)
def test_textblock(myopen, compression): text = b'123 456 789 abc def ghi'.replace(b' ', os.linesep.encode()) with filetext(text, open=myopen, mode='wb') as fn: text = ''.join(textblock(fn, 1, 11, compression)).encode() assert text == ('456 789 '.replace(' ', os.linesep)).encode() assert set(map(len, text.split())) == set([3]) k = 3 + len(os.linesep) assert ''.join(textblock(fn, 0, k, compression)).encode() == ('123' + os.linesep).encode() assert ''.join(textblock(fn, k, k, compression)).encode() == b''
def test_textblock(myopen, compression): text = b"123 456 789 abc def ghi".replace(b" ", os.linesep.encode()) with filetext(text, open=myopen, mode="wb") as fn: text = "".join(textblock(fn, 1, 11, compression)).encode() assert text == ("456 789 ".replace(" ", os.linesep)).encode() assert set(map(len, text.split())) == set([3]) k = 3 + len(os.linesep) assert "".join(textblock(fn, 0, k, compression)).encode() == ("123" + os.linesep).encode() assert "".join(textblock(fn, k, k, compression)).encode() == b""
def test_textblock(): text = b'123 456 789 abc def ghi'.replace(b' ', os.linesep.encode()) with filetext(text, mode='wb') as fn: with open(fn, 'rb') as f: text = textblock(f, 1, 11) assert text == ('456 789 '.replace(' ', os.linesep)).encode() assert set(map(len, text.split())) == set([3]) assert textblock(f, 1, 10) == textblock(fn, 1, 10) assert textblock(f, 0, 3) == ('123' + os.linesep).encode() assert textblock(f, 3, 3) == b''
def test_textblock_multibyte_linesep(): text = b'12 34 56 78'.replace(b' ', b'\r\n') with filetext(text, mode='wb') as fn: text = [ line.encode() for line in textblock(fn, 5, 13, linesep='\r\n', buffersize=2) ] assert text == [line.encode() for line in ('56\r\n', '78')]
def test_gh606(): encoding = "utf-16-le" euro = u"\u20ac" yen = u"\u00a5" linesep = os.linesep bin_euro = u"\u20ac".encode(encoding) bin_yen = u"\u00a5".encode(encoding) bin_linesep = linesep.encode(encoding) data = (euro * 10) + linesep + (yen * 10) + linesep + (euro * 10) bin_data = data.encode(encoding) with tmpfile() as fn: with open(fn, "wb") as f: f.write(bin_data) stop = len(bin_euro) * 10 + len(bin_linesep) + 1 res = "".join(textblock(fn, 1, stop, encoding=encoding)).encode(encoding) assert res == ((yen * 10) + linesep).encode(encoding) stop = len(bin_euro) * 10 + len(bin_linesep) + 1 res = "".join(textblock(fn, 0, stop, encoding=encoding)).encode(encoding) assert res == ((euro * 10) + linesep + (yen * 10) + linesep).encode(encoding)
def test_textblock_multibyte_linesep(): text = b'12 34 56 78'.replace(b' ', b'\r\n') with filetext(text, mode='wb') as fn: text = [line.encode() for line in textblock(fn, 5, 13, linesep='\r\n', buffersize=2)] assert text == [line.encode() for line in ('56\r\n', '78')]
def test_textblock_multibyte_linesep(): text = b"12 34 56 78".replace(b" ", b"\r\n") with filetext(text, mode="wb") as fn: text = [line.encode() for line in textblock(fn, 5, 13, linesep="\r\n", buffersize=2)] assert text == [line.encode() for line in ("56\r\n", "78")]