Exemplo n.º 1
0
 def _open(self):
     if isinstance(self._fileid, PathPointer):
         if re.match(r'.*\.gz$', str(self._fileid)):
             self._stream = gzip.open(str(self._fileid), 'rb')
         else:
             self._stream = self._fileid.open(self._encoding)
     elif self._encoding:
         self._stream = SeekableUnicodeStreamReader(
             open(self._fileid, 'rb'), self._encoding)
     else:
         self._stream = open(self._fileid, 'rb')
Exemplo n.º 2
0
 def _open(self):
     """
     Open the file stream associated with this corpus view.  This
     will be called performed if any value is read from the view
     while its file stream is closed.
     """
     if isinstance(self._fileid, PathPointer):
         self._stream = self._fileid.open(self._encoding)
     elif self._encoding:
         self._stream = SeekableUnicodeStreamReader(
             open(self._fileid, "rb"), self._encoding)
     else:
         self._stream = open(self._fileid, "rb")
Exemplo n.º 3
0
    assert list(c3[s:]) == l3[s:]
for e in indices:
    assert list(c3[:e]) == l3[:e]
print(list(c3[:]) == list(l3[:]))
c3 = StreamBackedCorpusView(f3, read_whitespace_block)
iterators = [c3.iterate_from(n) for n in [0, 15, 30, 45]]
for i in range(15):
    for iterator in iterators:
        print('%-15s' % next(iterator))
    print()
# SeekableUnicodeStreamReader
stream = BytesIO(b"""
This is a test file.
It is encoded in ascii.
""".decode('ascii').encode('ascii'))
reader = SeekableUnicodeStreamReader(stream, 'ascii')
print(reader.read())  # read the entire file.
print(reader.seek(0))  # rewind to the start.
print(reader.read(5))  # read at most 5 bytes.
print(reader.readline())  # read to the end of the line.
print(reader.seek(0))  # rewind to the start.
for line in reader:
    print(repr(line))  # iterate over lines
print(reader.seek(0))  # rewind to the start.
print(reader.readlines())  # read a list of line strings
print(reader.close())
# size argument to read()
stream = BytesIO(b"""
This is a test file.
It is encoded in utf-16.
""".decode('ascii').encode('utf-16'))