def clean_text( self, text: str, fuzzy: bool = False, format: Optional[str] = None, proxy: Optional["EntityProxy"] = None, ) -> Optional[str]: """Parse and normalize an email address. Returns None if this is not an email address. """ email = strip_quotes(text) if email is None or not self.REGEX.match(email): return None mailbox, domain = email.rsplit("@", 1) # TODO: https://pypi.python.org/pypi/publicsuffix/ # handle URLs by extracting the domain name domain = urlparse(domain).hostname or domain domain = domain.lower() domain = domain.rstrip(".") # handle unicode domain = domain.encode("idna").decode("ascii") if domain is not None and mailbox is not None: return "@".join((mailbox, domain)) return None
def clean_text( self, text: str, fuzzy: bool = False, format: Optional[str] = None, proxy: Optional["EntityProxy"] = None, ) -> Optional[str]: """Basic clean-up.""" name = strip_quotes(text) return collapse_spaces(name)
def clean_text(self, email, **kwargs): """Parse and normalize an email address. Returns None if this is not an email address. """ email = strip_quotes(email) if not self.EMAIL_REGEX.match(email): return None mailbox, domain = email.rsplit('@', 1) domain = self.domains.clean(domain, **kwargs) if domain is not None and mailbox is not None: return '@'.join((mailbox, domain))
def clean_text(self, name, **kwargs): """Basic clean-up.""" name = strip_quotes(name) name = collapse_spaces(name) return name