Exemplo n.º 1
0
    def __init__(self, filename):
        non_utf8_error = None
        compressed_format = None
        contents_header_bytes = None
        contents_header = None  # First MAX_BYTES of the file.
        truncated = False
        # A future direction to optimize sniffing even more for sniffers at the top of the list
        # is to lazy load contents_header based on what interface is requested. For instance instead
        # of returning a StringIO directly in string_io() return an object that reads the contents and
        # populates contents_header while providing a StringIO-like interface until the file is read
        # but then would fallback to native string_io()
        try:
            compressed_format, f = compression_utils.get_fileobj_raw(
                filename, "rb")
            try:
                contents_header_bytes = f.read(SNIFF_PREFIX_BYTES)
                truncated = len(contents_header_bytes) == SNIFF_PREFIX_BYTES
                contents_header = contents_header_bytes.decode("utf-8")
            finally:
                f.close()
        except UnicodeDecodeError as e:
            non_utf8_error = e

        self.truncated = truncated
        self.filename = filename
        self.non_utf8_error = non_utf8_error
        self.binary = non_utf8_error is not None  # obviously wrong
        self.compressed_format = compressed_format
        self.contents_header = contents_header
        self.contents_header_bytes = contents_header_bytes
        self._file_size = None
Exemplo n.º 2
0
    def __init__(self, filename):
        binary = False
        compressed_format = None
        contents_header = None  # First MAX_BYTES of the file.
        truncated = False
        # A future direction to optimize sniffing even more for sniffers at the top of the list
        # is to lazy load contents_header based on what interface is requested. For instance instead
        # of returning a StringIO directly in string_io() return an object that reads the contents and
        # populates contents_header while providing a StringIO-like interface until the file is read
        # but then would fallback to native string_io()
        try:
            compressed_format, f = compression_utils.get_fileobj_raw(filename)
            try:
                contents_header = f.read(SNIFF_PREFIX_BYTES)
                truncated = len(contents_header) == SNIFF_PREFIX_BYTES
            finally:
                f.close()
        except UnicodeDecodeError:
            binary = True

        self.truncated = truncated
        self.filename = filename
        self.binary = binary
        self.compressed_format = compressed_format
        self.contents_header = contents_header
        self._file_size = None
Exemplo n.º 3
0
 def assert_format_detected(self, path, expected_fmt, allowed_fmts=None):
     for mode in ['r', 'rb', 'rt', 'U']:
         if 'b' in mode:
             expected_type = bytes
         else:
             expected_type = str
         fmt, fh = get_fileobj_raw(path, mode, allowed_fmts)
         assert fmt == expected_fmt
         assert isinstance(fh.read(0), expected_type)