示例#1
0
def guess_file_type(filename):
    """Attempt to guess the type of the input file.

    Args:
      filename: A string, the name of the file to guess the type for.
    Returns:
      A suitable mimetype string, or None if we could not guess.
    """

    warnings.warn(
        'beangulp.file_type.guess_file_type() is deprecated. '
        'Use the beangulp.mimetypes module instead.',
        DeprecationWarning,
        stacklevel=2)

    filetype, encoding = mimetypes.guess_type(filename, strict=False)
    if filetype:
        return filetype

    if magic:
        filetype = magic.from_file(filename, mime=True)
        if isinstance(filetype, bytes):
            filetype = filetype.decode('utf8')

    return filetype
示例#2
0
def mimetype(filename):
    """A converter that computes the MIME type of the file.

    Returns:
      A converter function.
    """
    mtype, _ = mimetypes.guess_type(filename, strict=False)
    return mtype
示例#3
0
 def identify(self, filepath):
     mimetype, encoding = mimetypes.guess_type(filepath)
     if mimetype != 'text/csv':
         return False
     with open(filepath) as fd:
         head = fd.read(1024)
     return head.startswith('Details,Posting Date,"Description",'
                            'Amount,Type,Balance,Check or Slip #,')
示例#4
0
文件: acme.py 项目: huruka/beangulp
    def identify(self, filepath):
        mimetype, encoding = mimetypes.guess_type(filepath)
        if mimetype != 'application/pdf':
            return False

        # Look for some words in the PDF file to figure out if it's a statement
        # from ACME. The filename they provide (Statement.pdf) isn't useful.
        text = pdf_to_text(filepath)
        if text:
            return re.match('ACME Bank', text) is not None
示例#5
0
def is_mimetype(filepath: str,
                check_mimetypes: Union[str, Set[str]],
                regexp: Optional[bool] = False) -> bool:
    """Check if a file is of one of many mimetypes."""
    if isinstance(check_mimetypes, str):
        check_mimetypes = set((check_mimetypes, ))
    mtype, _ = mimetypes.guess_type(filepath)
    if mtype is None:
        return False
    return (any(re.fullmatch(r, mtype)
                for r in check_mimetypes) if not regexp else
            (mtype in check_mimetypes))
示例#6
0
    def identify(self, filepath):
        # Match for a compatible MIME type.
        if mimetypes.guess_type(filepath) not in {
                'application/x-ofx', 'application/vnd.intu.qbo',
                'application/vnd.intu.qfx'
        }:
            return False

        # Match the account id.
        with open(filepath) as fd:
            contents = fd.read()
        return any(
            re.match(self.acctid_regexp, acctid)
            for acctid in find_acctids(contents))
示例#7
0
文件: utils.py 项目: huruka/beangulp
 def identify(self, filepath):
     mimetype, encoding = mimetypes.guess_type(filepath, False)
     return mimetype == self._mimetype