def guess_file_type(filename): """Attempt to guess the type of the input file. Args: filename: A string, the name of the file to guess the type for. Returns: A suitable mimetype string, or None if we could not guess. """ warnings.warn( 'beangulp.file_type.guess_file_type() is deprecated. ' 'Use the beangulp.mimetypes module instead.', DeprecationWarning, stacklevel=2) filetype, encoding = mimetypes.guess_type(filename, strict=False) if filetype: return filetype if magic: filetype = magic.from_file(filename, mime=True) if isinstance(filetype, bytes): filetype = filetype.decode('utf8') return filetype
def mimetype(filename): """A converter that computes the MIME type of the file. Returns: A converter function. """ mtype, _ = mimetypes.guess_type(filename, strict=False) return mtype
def identify(self, filepath): mimetype, encoding = mimetypes.guess_type(filepath) if mimetype != 'text/csv': return False with open(filepath) as fd: head = fd.read(1024) return head.startswith('Details,Posting Date,"Description",' 'Amount,Type,Balance,Check or Slip #,')
def identify(self, filepath): mimetype, encoding = mimetypes.guess_type(filepath) if mimetype != 'application/pdf': return False # Look for some words in the PDF file to figure out if it's a statement # from ACME. The filename they provide (Statement.pdf) isn't useful. text = pdf_to_text(filepath) if text: return re.match('ACME Bank', text) is not None
def is_mimetype(filepath: str, check_mimetypes: Union[str, Set[str]], regexp: Optional[bool] = False) -> bool: """Check if a file is of one of many mimetypes.""" if isinstance(check_mimetypes, str): check_mimetypes = set((check_mimetypes, )) mtype, _ = mimetypes.guess_type(filepath) if mtype is None: return False return (any(re.fullmatch(r, mtype) for r in check_mimetypes) if not regexp else (mtype in check_mimetypes))
def identify(self, filepath): # Match for a compatible MIME type. if mimetypes.guess_type(filepath) not in { 'application/x-ofx', 'application/vnd.intu.qbo', 'application/vnd.intu.qfx' }: return False # Match the account id. with open(filepath) as fd: contents = fd.read() return any( re.match(self.acctid_regexp, acctid) for acctid in find_acctids(contents))
def identify(self, filepath): mimetype, encoding = mimetypes.guess_type(filepath, False) return mimetype == self._mimetype